5 # This class implements the JSON parser that is used to parse a JSON string
6 # into a Ruby data structure.
7 class Parser < StringScanner
8 STRING = /" ((?:[^\x0-\x1f"\\] |
9 # escaped special characters:
12 # match all but escaped special characters:
13 \\[\x20-\x21\x23-\x2e\x30-\x5b\x5d-\x61\x63-\x65\x67-\x6d\x6f-\x71\x73\x75-\xff])*)
15 INTEGER = /(-?0|-?[1-9]\d*)/
26 MINUS_INFINITY = /-Infinity/
32 COLLECTION_DELIMITER = /,/
38 //[^\n\r]*[\n\r]| # line comments
39 /\* # c-style comments
42 /[^*]| # slashes that do not start a nested comment
43 \*[^/]| # asterisks that do not end this comment
44 /(?=\*/) # single slash before this comment's end
46 \*/ # the End of this comment
47 |[ \t\r\n]+ # whitespaces: space, horicontal tab, lf, cr
53 # Creates a new JSON::Pure::Parser instance for the string _source_.
55 # It will be configured by the _opts_ hash. _opts_ can have the following
57 # * *max_nesting*: The maximum depth of nesting allowed in the parsed data
58 # structures. Disable depth checking with :max_nesting => false|nil|0,
60 # * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in
61 # defiance of RFC 4627 to be parsed by the Parser. This option defaults
63 # * *symbolize_names*: If set to true, returns symbols for the names
64 # (keys) in a JSON object. Otherwise strings are returned, which is also
66 # * *create_additions*: If set to true, the Parser creates
67 # additions when if a matching class and create_id was found. This
68 # option defaults to false.
69 # * *object_class*: Defaults to Hash
70 # * *array_class*: Defaults to Array
71 # * *quirks_mode*: Enables quirks_mode for parser, that is for example
72 # parsing single JSON values instead of documents is possible.
73 def initialize(source, opts = {})
75 unless @quirks_mode = opts[:quirks_mode]
76 source = determine_encoding source
79 if !opts.key?(:max_nesting) # defaults to 19
81 elsif opts[:max_nesting]
82 @max_nesting = opts[:max_nesting]
86 @allow_nan = !!opts[:allow_nan]
87 @symbolize_names = !!opts[:symbolize_names]
88 if opts.key?(:create_additions)
89 @create_additions = !!opts[:create_additions]
91 @create_additions = false
93 @create_id = @create_additions ? JSON.create_id : nil
94 @object_class = opts[:object_class] || Hash
95 @array_class = opts[:array_class] || Array
96 @match_string = opts[:match_string]
110 # Parses the current JSON string _source_ and returns the complete data
111 # structure as a result.
116 while !eos? && skip(IGNORE)
119 raise ParserError, "source did not contain any JSON!"
122 obj == UNPARSED and raise ParserError, "source did not contain any JSON!"
127 when scan(OBJECT_OPEN)
128 obj and raise ParserError, "source '#{peek(20)}' not in JSON!"
131 when scan(ARRAY_OPEN)
132 obj and raise ParserError, "source '#{peek(20)}' not in JSON!"
138 raise ParserError, "source '#{peek(20)}' not in JSON!"
141 obj or raise ParserError, "source did not contain any JSON!"
148 def determine_encoding(source)
149 if defined?(::Encoding)
150 if source.encoding == ::Encoding::ASCII_8BIT
151 b = source[0, 4].bytes.to_a
154 when b.size >= 4 && b[0] == 0 && b[1] == 0 && b[2] == 0
155 source.dup.force_encoding(::Encoding::UTF_32BE).encode!(::Encoding::UTF_8)
156 when b.size >= 4 && b[0] == 0 && b[2] == 0
157 source.dup.force_encoding(::Encoding::UTF_16BE).encode!(::Encoding::UTF_8)
158 when b.size >= 4 && b[1] == 0 && b[2] == 0 && b[3] == 0
159 source.dup.force_encoding(::Encoding::UTF_32LE).encode!(::Encoding::UTF_8)
160 when b.size >= 4 && b[1] == 0 && b[3] == 0
161 source.dup.force_encoding(::Encoding::UTF_16LE).encode!(::Encoding::UTF_8)
166 source = source.encode(::Encoding::UTF_8)
168 source.force_encoding(::Encoding::ASCII_8BIT)
173 when b.size >= 4 && b[0] == 0 && b[1] == 0 && b[2] == 0
174 JSON.iconv('utf-8', 'utf-32be', b)
175 when b.size >= 4 && b[0] == 0 && b[2] == 0
176 JSON.iconv('utf-8', 'utf-16be', b)
177 when b.size >= 4 && b[1] == 0 && b[2] == 0 && b[3] == 0
178 JSON.iconv('utf-8', 'utf-32le', b)
179 when b.size >= 4 && b[1] == 0 && b[3] == 0
180 JSON.iconv('utf-8', 'utf-16le', b)
188 # Unescape characters in strings.
189 UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr }
190 UNESCAPE_MAP.update({
202 EMPTY_8BIT_STRING = ''
203 if ::String.method_defined?(:encode)
204 EMPTY_8BIT_STRING.force_encoding Encoding::ASCII_8BIT
209 return '' if self[1].empty?
210 string = self[1].gsub(%r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n) do |c|
211 if u = UNESCAPE_MAP[$&[1]]
214 bytes = EMPTY_8BIT_STRING.dup
216 while c[6 * i] == ?\\ && c[6 * i + 1] == ?u
217 bytes << c[6 * i + 2, 2].to_i(16) << c[6 * i + 4, 2].to_i(16)
220 JSON.iconv('utf-8', 'utf-16be', bytes)
223 if string.respond_to?(:force_encoding)
224 string.force_encoding(::Encoding::UTF_8)
226 if @create_additions and @match_string
227 for (regexp, klass) in @match_string
228 klass.json_creatable? or next
229 string =~ regexp and return klass.json_create(string)
237 raise ParserError, "Caught #{e.class} at '#{peek(20)}': #{e}"
252 when (string = parse_string) != UNPARSED
254 when scan(ARRAY_OPEN)
255 @current_nesting += 1
257 @current_nesting -= 1
259 when scan(OBJECT_OPEN)
260 @current_nesting += 1
262 @current_nesting -= 1
264 when @allow_nan && scan(NAN)
266 when @allow_nan && scan(INFINITY)
268 when @allow_nan && scan(MINUS_INFINITY)
276 raise NestingError, "nesting of #@current_nesting is too deep" if
277 @max_nesting.nonzero? && @current_nesting > @max_nesting
278 result = @array_class.new
282 when (value = parse_value) != UNPARSED
286 if scan(COLLECTION_DELIMITER)
288 elsif match?(ARRAY_CLOSE)
291 raise ParserError, "expected ',' or ']' in array at '#{peek(20)}'!"
293 when scan(ARRAY_CLOSE)
295 raise ParserError, "expected next element in array at '#{peek(20)}'!"
301 raise ParserError, "unexpected token in array at '#{peek(20)}'!"
308 raise NestingError, "nesting of #@current_nesting is too deep" if
309 @max_nesting.nonzero? && @current_nesting > @max_nesting
310 result = @object_class.new
314 when (string = parse_string) != UNPARSED
316 unless scan(PAIR_DELIMITER)
317 raise ParserError, "expected ':' in object at '#{peek(20)}'!"
320 unless (value = parse_value).equal? UNPARSED
321 result[@symbolize_names ? string.to_sym : string] = value
324 if scan(COLLECTION_DELIMITER)
326 elsif match?(OBJECT_CLOSE)
329 raise ParserError, "expected ',' or '}' in object at '#{peek(20)}'!"
332 raise ParserError, "expected value in object at '#{peek(20)}'!"
334 when scan(OBJECT_CLOSE)
336 raise ParserError, "expected next name, value pair in object at '#{peek(20)}'!"
338 if @create_additions and klassname = result[@create_id]
339 klass = JSON.deep_const_get klassname
340 break unless klass and klass.json_creatable?
341 result = klass.json_create(result)
347 raise ParserError, "unexpected token in object at '#{peek(20)}'!"