2 * This code is copyrighted work by Daniel Luz <dev at mernen dot com>.
4 * Distributed under the Ruby and GPLv2 licenses; see COPYING and GPL files
10 import org.jruby.RubyArray;
11 import org.jruby.RubyClass;
12 import org.jruby.RubyEncoding;
13 import org.jruby.RubyFloat;
14 import org.jruby.RubyHash;
15 import org.jruby.RubyInteger;
16 import org.jruby.RubyModule;
17 import org.jruby.RubyNumeric;
18 import org.jruby.RubyObject;
19 import org.jruby.RubyString;
20 import org.jruby.anno.JRubyMethod;
21 import org.jruby.exceptions.JumpException;
22 import org.jruby.exceptions.RaiseException;
23 import org.jruby.runtime.Block;
24 import org.jruby.runtime.ObjectAllocator;
25 import org.jruby.runtime.ThreadContext;
26 import org.jruby.runtime.Visibility;
27 import org.jruby.runtime.builtin.IRubyObject;
28 import org.jruby.util.ByteList;
31 * The <code>JSON::Ext::Parser</code> class.
33 * <p>This is the JSON parser implemented as a Java class. To use it as the
34 * standard parser, set
35 * <pre>JSON.parser = JSON::Ext::Parser</pre>
36 * This is performed for you when you <code>include "json/ext"</code>.
38 * <p>This class does not perform the actual parsing, just acts as an interface
39 * to Ruby code. When the {@link #parse()} method is invoked, a
40 * Parser.ParserSession object is instantiated, which handles the process.
44 public class Parser extends RubyObject {
45 private final RuntimeInfo info;
46 private RubyString vSource;
47 private RubyString createId;
48 private boolean createAdditions;
49 private int maxNesting;
50 private boolean allowNaN;
51 private boolean symbolizeNames;
52 private boolean quirksMode;
53 private RubyClass objectClass;
54 private RubyClass arrayClass;
55 private RubyHash match_string;
57 private static final int DEFAULT_MAX_NESTING = 19;
59 private static final String JSON_MINUS_INFINITY = "-Infinity";
60 // constant names in the JSON module containing those values
61 private static final String CONST_NAN = "NaN";
62 private static final String CONST_INFINITY = "Infinity";
63 private static final String CONST_MINUS_INFINITY = "MinusInfinity";
65 static final ObjectAllocator ALLOCATOR = new ObjectAllocator() {
66 public IRubyObject allocate(Ruby runtime, RubyClass klazz) {
67 return new Parser(runtime, klazz);
72 * Multiple-value return for internal parser methods.
74 * <p>All the <code>parse<var>Stuff</var></code> methods return instances of
75 * <code>ParserResult</code> when successful, or <code>null</code> when
76 * there's a problem with the input data.
78 static final class ParserResult {
80 * The result of the successful parsing. Should never be
83 final IRubyObject result;
85 * The point where the parser returned.
89 ParserResult(IRubyObject result, int p) {
95 public Parser(Ruby runtime, RubyClass metaClass) {
96 super(runtime, metaClass);
97 info = RuntimeInfo.forRuntime(runtime);
101 * <code>Parser.new(source, opts = {})</code>
103 * <p>Creates a new <code>JSON::Ext::Parser</code> instance for the string
104 * <code>source</code>.
105 * It will be configured by the <code>opts</code> Hash.
106 * <code>opts</code> can have the following keys:
109 * <dt><code>:max_nesting</code>
110 * <dd>The maximum depth of nesting allowed in the parsed data
111 * structures. Disable depth checking with <code>:max_nesting => false|nil|0</code>,
114 * <dt><code>:allow_nan</code>
115 * <dd>If set to <code>true</code>, allow <code>NaN</code>,
116 * <code>Infinity</code> and <code>-Infinity</code> in defiance of RFC 4627
117 * to be parsed by the Parser. This option defaults to <code>false</code>.
119 * <dt><code>:symbolize_names</code>
120 * <dd>If set to <code>true</code>, returns symbols for the names (keys) in
121 * a JSON object. Otherwise strings are returned, which is also the default.
123 * <dt><code>:quirks_mode?</code>
124 * <dd>If set to <code>true</code>, if the parse is in quirks_mode, false
127 * <dt><code>:create_additions</code>
128 * <dd>If set to <code>false</code>, the Parser doesn't create additions
129 * even if a matchin class and <code>create_id</code> was found. This option
130 * defaults to <code>true</code>.
132 * <dt><code>:object_class</code>
133 * <dd>Defaults to Hash.
135 * <dt><code>:array_class</code>
136 * <dd>Defaults to Array.
138 * <dt><code>:quirks_mode</code>
139 * <dd>Enables quirks_mode for parser, that is for example parsing single
140 * JSON values instead of documents is possible.
143 @JRubyMethod(name = "new", required = 1, optional = 1, meta = true)
144 public static IRubyObject newInstance(IRubyObject clazz, IRubyObject[] args, Block block) {
145 Parser parser = (Parser)((RubyClass)clazz).allocate();
147 parser.callInit(args, block);
152 @JRubyMethod(required = 1, optional = 1, visibility = Visibility.PRIVATE)
153 public IRubyObject initialize(ThreadContext context, IRubyObject[] args) {
154 Ruby runtime = context.getRuntime();
155 if (this.vSource != null) {
156 throw runtime.newTypeError("already initialized instance");
159 OptionsReader opts = new OptionsReader(context, args.length > 1 ? args[1] : null);
160 this.maxNesting = opts.getInt("max_nesting", DEFAULT_MAX_NESTING);
161 this.allowNaN = opts.getBool("allow_nan", false);
162 this.symbolizeNames = opts.getBool("symbolize_names", false);
163 this.quirksMode = opts.getBool("quirks_mode", false);
164 this.createId = opts.getString("create_id", getCreateId(context));
165 this.createAdditions = opts.getBool("create_additions", false);
166 this.objectClass = opts.getClass("object_class", runtime.getHash());
167 this.arrayClass = opts.getClass("array_class", runtime.getArray());
168 this.match_string = opts.getHash("match_string");
170 this.vSource = args[0].convertToString();
171 if (!quirksMode) this.vSource = convertEncoding(context, vSource);
177 * Checks the given string's encoding. If a non-UTF-8 encoding is detected,
178 * a converted copy is returned.
179 * Returns the source string if no conversion is needed.
181 private RubyString convertEncoding(ThreadContext context, RubyString source) {
182 ByteList bl = source.getByteList();
183 int len = bl.length();
185 throw Utils.newException(context, Utils.M_PARSER_ERROR,
186 "A JSON text must at least contain two octets!");
189 if (info.encodingsSupported()) {
190 RubyEncoding encoding = (RubyEncoding)source.encoding(context);
191 if (encoding != info.ascii8bit.get()) {
192 return (RubyString)source.encode(context, info.utf8.get());
195 String sniffedEncoding = sniffByteList(bl);
196 if (sniffedEncoding == null) return source; // assume UTF-8
197 return reinterpretEncoding(context, source, sniffedEncoding);
200 String sniffedEncoding = sniffByteList(bl);
201 if (sniffedEncoding == null) return source; // assume UTF-8
202 Ruby runtime = context.getRuntime();
203 return (RubyString)info.jsonModule.get().
204 callMethod(context, "iconv",
206 runtime.newString("utf-8"),
207 runtime.newString(sniffedEncoding),
212 * Checks the first four bytes of the given ByteList to infer its encoding,
213 * using the principle demonstrated on section 3 of RFC 4627 (JSON).
215 private static String sniffByteList(ByteList bl) {
216 if (bl.length() < 4) return null;
217 if (bl.get(0) == 0 && bl.get(2) == 0) {
218 return bl.get(1) == 0 ? "utf-32be" : "utf-16be";
220 if (bl.get(1) == 0 && bl.get(3) == 0) {
221 return bl.get(2) == 0 ? "utf-32le" : "utf-16le";
227 * Assumes the given (binary) RubyString to be in the given encoding, then
228 * converts it to UTF-8.
230 private RubyString reinterpretEncoding(ThreadContext context,
231 RubyString str, String sniffedEncoding) {
232 RubyEncoding actualEncoding = info.getEncoding(context, sniffedEncoding);
233 RubyEncoding targetEncoding = info.utf8.get();
234 RubyString dup = (RubyString)str.dup();
235 dup.force_encoding(context, actualEncoding);
236 return (RubyString)dup.encode_bang(context, targetEncoding);
240 * <code>Parser#parse()</code>
242 * <p>Parses the current JSON text <code>source</code> and returns the
243 * complete data structure as a result.
246 public IRubyObject parse(ThreadContext context) {
247 return new ParserSession(this, context).parse();
251 * <code>Parser#source()</code>
253 * <p>Returns a copy of the current <code>source</code> string, that was
254 * used to construct this Parser.
256 @JRubyMethod(name = "source")
257 public IRubyObject source_get() {
258 return checkAndGetSource().dup();
262 * <code>Parser#quirks_mode?()</code>
264 * <p>If set to <code>true</code>, if the parse is in quirks_mode, false
267 @JRubyMethod(name = "quirks_mode?")
268 public IRubyObject quirks_mode_p(ThreadContext context) {
269 return context.getRuntime().newBoolean(quirksMode);
272 public RubyString checkAndGetSource() {
273 if (vSource != null) {
276 throw getRuntime().newTypeError("uninitialized instance");
281 * Queries <code>JSON.create_id</code>. Returns <code>null</code> if it is
282 * set to <code>nil</code> or <code>false</code>, and a String if not.
284 private RubyString getCreateId(ThreadContext context) {
285 IRubyObject v = info.jsonModule.get().callMethod(context, "create_id");
286 return v.isTrue() ? v.convertToString() : null;
290 * A string parsing session.
292 * <p>Once a ParserSession is instantiated, the source string should not
293 * change until the parsing is complete. The ParserSession object assumes
294 * the source {@link RubyString} is still associated to its original
295 * {@link ByteList}, which in turn must still be bound to the same
296 * <code>byte[]</code> value (and on the same offset).
298 // Ragel uses lots of fall-through
299 @SuppressWarnings("fallthrough")
300 private static class ParserSession {
301 private final Parser parser;
302 private final ThreadContext context;
303 private final ByteList byteList;
304 private final byte[] data;
305 private final StringDecoder decoder;
306 private int currentNesting = 0;
308 // initialization value for all state variables.
309 // no idea about the origins of this value, ask Flori ;)
310 private static final int EVIL = 0x666;
312 private ParserSession(Parser parser, ThreadContext context) {
313 this.parser = parser;
314 this.context = context;
315 this.byteList = parser.checkAndGetSource().getByteList();
316 this.data = byteList.unsafeBytes();
317 this.decoder = new StringDecoder(context);
320 private RaiseException unexpectedToken(int absStart, int absEnd) {
321 RubyString msg = getRuntime().newString("unexpected token at '")
322 .cat(data, absStart, absEnd - absStart)
324 return newException(Utils.M_PARSER_ERROR, msg);
327 private Ruby getRuntime() {
328 return context.getRuntime();
337 c_comment = '/*' ( any* - (any* '*/' any* ) ) '*/';
338 cpp_comment = '//' cr_neg* cr;
339 comment = c_comment | cpp_comment;
340 ignore = ws | comment;
341 name_separator = ':';
342 value_separator = ',';
347 VInfinity = 'Infinity';
348 VMinusInfinity = '-Infinity';
349 begin_value = [nft"\-[{NI] | digit;
355 begin_name = begin_string;
356 begin_number = digit | '-';
366 result = getRuntime().getNil();
369 result = getRuntime().getFalse();
372 result = getRuntime().getTrue();
375 if (parser.allowNaN) {
376 result = getConstant(CONST_NAN);
378 throw unexpectedToken(p - 2, pe);
381 action parse_infinity {
382 if (parser.allowNaN) {
383 result = getConstant(CONST_INFINITY);
385 throw unexpectedToken(p - 7, pe);
388 action parse_number {
389 if (pe > fpc + 9 - (parser.quirksMode ? 1 : 0) &&
390 absSubSequence(fpc, fpc + 9).toString().equals(JSON_MINUS_INFINITY)) {
392 if (parser.allowNaN) {
393 result = getConstant(CONST_MINUS_INFINITY);
398 throw unexpectedToken(p, pe);
401 ParserResult res = parseFloat(fpc, pe);
406 res = parseInteger(fpc, pe);
414 action parse_string {
415 ParserResult res = parseString(fpc, pe);
426 ParserResult res = parseArray(fpc, pe);
436 action parse_object {
438 ParserResult res = parseObject(fpc, pe);
453 main := ( Vnull @parse_null |
454 Vfalse @parse_false |
457 VInfinity @parse_infinity |
458 begin_number >parse_number |
459 begin_string >parse_string |
460 begin_array >parse_array |
461 begin_object >parse_object
465 ParserResult parseValue(int p, int pe) {
467 IRubyObject result = null;
472 if (cs >= JSON_value_first_final && result != null) {
473 return new ParserResult(result, p);
480 machine JSON_integer;
489 main := '-'? ( '0' | [1-9][0-9]* ) ( ^[0-9]? @exit );
492 ParserResult parseInteger(int p, int pe) {
499 if (cs < JSON_integer_first_final) {
503 ByteList num = absSubSequence(memo, p);
504 // note: this is actually a shared string, but since it is temporary and
505 // read-only, it doesn't really matter
506 RubyString expr = RubyString.newStringLight(getRuntime(), num);
507 RubyInteger number = RubyNumeric.str2inum(getRuntime(), expr, 10, true);
508 return new ParserResult(number, p + 1);
523 ( ( ( '0' | [1-9][0-9]* ) '.' [0-9]+ ( [Ee] [+\-]?[0-9]+ )? )
524 | ( ( '0' | [1-9][0-9]* ) ( [Ee] [+\-]? [0-9]+ ) ) )
525 ( ^[0-9Ee.\-]? @exit );
528 ParserResult parseFloat(int p, int pe) {
535 if (cs < JSON_float_first_final) {
539 ByteList num = absSubSequence(memo, p);
540 // note: this is actually a shared string, but since it is temporary and
541 // read-only, it doesn't really matter
542 RubyString expr = RubyString.newStringLight(getRuntime(), num);
543 RubyFloat number = RubyNumeric.str2fnum(getRuntime(), expr, true);
544 return new ParserResult(number, p + 1);
553 action parse_string {
554 int offset = byteList.begin();
555 ByteList decoded = decoder.decode(byteList, memo + 1 - offset,
557 result = getRuntime().newString(decoded);
558 if (result == null) {
574 | '\\u'[0-9a-fA-F]{4}
575 | '\\'^(["\\/bfnrtu]|0..0x1f)
580 ParserResult parseString(int p, int pe) {
582 IRubyObject result = null;
588 if (parser.createAdditions) {
589 RubyHash match_string = parser.match_string;
590 if (match_string != null) {
591 final IRubyObject[] memoArray = { result, null };
593 match_string.visitAll(new RubyHash.Visitor() {
595 public void visit(IRubyObject pattern, IRubyObject klass) {
596 if (pattern.callMethod(context, "===", memoArray[0]).isTrue()) {
597 memoArray[1] = klass;
598 throw JumpException.SPECIAL_JUMP;
602 } catch (JumpException e) { }
603 if (memoArray[1] != null) {
604 RubyClass klass = (RubyClass) memoArray[1];
605 if (klass.respondsTo("json_creatable?") &&
606 klass.callMethod(context, "json_creatable?").isTrue()) {
607 result = klass.callMethod(context, "json_create", result);
613 if (cs >= JSON_string_first_final && result != null) {
614 return new ParserResult(result, p + 1);
627 ParserResult res = parseValue(fpc, pe);
632 if (!parser.arrayClass.getName().equals("Array")) {
633 result.callMethod(context, "<<", res.result);
635 result.append(res.result);
646 next_element = value_separator ignore* begin_value >parse_value;
650 ( ( begin_value >parse_value
659 ParserResult parseArray(int p, int pe) {
662 if (parser.maxNesting > 0 && currentNesting > parser.maxNesting) {
663 throw newException(Utils.M_NESTING_ERROR,
664 "nesting of " + currentNesting + " is too deep");
667 // this is guaranteed to be a RubyArray due to the earlier
668 // allocator test at OptionsReader#getClass
670 (RubyArray)parser.arrayClass.newInstance(context,
671 IRubyObject.NULL_ARRAY, Block.NULL_BLOCK);
676 if (cs >= JSON_array_first_final) {
677 return new ParserResult(result, p + 1);
679 throw unexpectedToken(p, pe);
690 ParserResult res = parseValue(fpc, pe);
695 if (!parser.objectClass.getName().equals("Hash")) {
696 result.callMethod(context, "[]=", new IRubyObject[] { lastName, res.result });
698 result.op_aset(context, lastName, res.result);
705 ParserResult res = parseString(fpc, pe);
710 RubyString name = (RubyString)res.result;
711 if (parser.symbolizeNames) {
712 lastName = context.getRuntime().is1_9()
727 pair = ignore* begin_name >parse_name ignore* name_separator
728 ignore* begin_value >parse_value;
729 next_pair = ignore* value_separator pair;
732 begin_object (pair (next_pair)*)? ignore* end_object
736 ParserResult parseObject(int p, int pe) {
738 IRubyObject lastName = null;
740 if (parser.maxNesting > 0 && currentNesting > parser.maxNesting) {
741 throw newException(Utils.M_NESTING_ERROR,
742 "nesting of " + currentNesting + " is too deep");
745 // this is guaranteed to be a RubyHash due to the earlier
746 // allocator test at OptionsReader#getClass
748 (RubyHash)parser.objectClass.newInstance(context,
749 IRubyObject.NULL_ARRAY, Block.NULL_BLOCK);
754 if (cs < JSON_object_first_final) {
758 IRubyObject returnedResult = result;
760 // attempt to de-serialize object
761 if (parser.createAdditions) {
762 IRubyObject vKlassName = result.op_aref(context, parser.createId);
763 if (!vKlassName.isNil()) {
764 // might throw ArgumentError, we let it propagate
765 IRubyObject klass = parser.info.jsonModule.get().
766 callMethod(context, "deep_const_get", vKlassName);
767 if (klass.respondsTo("json_creatable?") &&
768 klass.callMethod(context, "json_creatable?").isTrue()) {
770 returnedResult = klass.callMethod(context, "json_create", result);
774 return new ParserResult(returnedResult, p + 1);
783 action parse_object {
785 ParserResult res = parseObject(fpc, pe);
797 ParserResult res = parseArray(fpc, pe);
808 ( begin_object >parse_object
809 | begin_array >parse_array )
813 public IRubyObject parseStrict() {
816 IRubyObject result = null;
819 p = byteList.begin();
820 pe = p + byteList.length();
823 if (cs >= JSON_first_final && p == pe) {
826 throw unexpectedToken(p, pe);
831 machine JSON_quirks_mode;
837 ParserResult res = parseValue(fpc, pe);
848 ( begin_value >parse_value)
852 public IRubyObject parseQuirksMode() {
855 IRubyObject result = null;
858 p = byteList.begin();
859 pe = p + byteList.length();
862 if (cs >= JSON_quirks_mode_first_final && p == pe) {
865 throw unexpectedToken(p, pe);
869 public IRubyObject parse() {
870 if (parser.quirksMode) {
871 return parseQuirksMode();
873 return parseStrict();
879 * Returns a subsequence of the source ByteList, based on source
880 * array byte offsets (i.e., the ByteList's own begin offset is not
881 * automatically added).
885 private ByteList absSubSequence(int absStart, int absEnd) {
886 int offset = byteList.begin();
887 return (ByteList)byteList.subSequence(absStart - offset,
892 * Retrieves a constant directly descended from the <code>JSON</code> module.
893 * @param name The constant name
895 private IRubyObject getConstant(String name) {
896 return parser.info.jsonModule.get().getConstant(name);
899 private RaiseException newException(String className, String message) {
900 return Utils.newException(context, className, message);
903 private RaiseException newException(String className, RubyString message) {
904 return Utils.newException(context, className, message);
907 private RaiseException newException(String className,
908 String messageBegin, ByteList messageEnd) {
909 return newException(className,
910 getRuntime().newString(messageBegin).cat(messageEnd));