lib/mcollective/vendor/json/java/src/json/ext/StringDecoder.java

   1 /*
   2  * This code is copyrighted work by Daniel Luz <dev at mernen dot com>.
   3  *
   4  * Distributed under the Ruby and GPLv2 licenses; see COPYING and GPL files
   5  * for details.
   6  */
   7 package json.ext;
   8
   9 import org.jruby.exceptions.RaiseException;
  10 import org.jruby.runtime.ThreadContext;
  11 import org.jruby.util.ByteList;
  12
  13 /**
  14  * A decoder that reads a JSON-encoded string from the given sources and
  15  * returns its decoded form on a new ByteList. Escaped Unicode characters
  16  * are encoded as UTF-8.
  17  */
  18 final class StringDecoder extends ByteListTranscoder {
  19     /**
  20      * Stores the offset of the high surrogate when reading a surrogate pair,
  21      * or -1 when not.
  22      */
  23     private int surrogatePairStart = -1;
  24
  25     // Array used for writing multi-byte characters into the buffer at once
  26     private final byte[] aux = new byte[4];
  27
  28     StringDecoder(ThreadContext context) {
  29         super(context);
  30     }
  31
  32     ByteList decode(ByteList src, int start, int end) {
  33         ByteList out = new ByteList(end - start);
  34         init(src, start, end, out);
  35         while (hasNext()) {
  36             handleChar(readUtf8Char());
  37         }
  38         quoteStop(pos);
  39         return out;
  40     }
  41
  42     private void handleChar(int c) {
  43         if (c == '\\') {
  44             quoteStop(charStart);
  45             handleEscapeSequence();
  46         } else {
  47             quoteStart();
  48         }
  49     }
  50
  51     private void handleEscapeSequence() {
  52         ensureMin(1);
  53         switch (readUtf8Char()) {
  54         case 'b':
  55             append('\b');
  56             break;
  57         case 'f':
  58             append('\f');
  59             break;
  60         case 'n':
  61             append('\n');
  62             break;
  63         case 'r':
  64             append('\r');
  65             break;
  66         case 't':
  67             append('\t');
  68             break;
  69         case 'u':
  70             ensureMin(4);
  71             int cp = readHex();
  72             if (Character.isHighSurrogate((char)cp)) {
  73                 handleLowSurrogate((char)cp);
  74             } else if (Character.isLowSurrogate((char)cp)) {
  75                 // low surrogate with no high surrogate
  76                 throw invalidUtf8();
  77             } else {
  78                 writeUtf8Char(cp);
  79             }
  80             break;
  81         default: // '\\', '"', '/'...
  82             quoteStart();
  83         }
  84     }
  85
  86     private void handleLowSurrogate(char highSurrogate) {
  87         surrogatePairStart = charStart;
  88         ensureMin(1);
  89         int lowSurrogate = readUtf8Char();
  90
  91         if (lowSurrogate == '\\') {
  92             ensureMin(5);
  93             if (readUtf8Char() != 'u') throw invalidUtf8();
  94             lowSurrogate = readHex();
  95         }
  96
  97         if (Character.isLowSurrogate((char)lowSurrogate)) {
  98             writeUtf8Char(Character.toCodePoint(highSurrogate,
  99                                                 (char)lowSurrogate));
 100             surrogatePairStart = -1;
 101         } else {
 102             throw invalidUtf8();
 103         }
 104     }
 105
 106     private void writeUtf8Char(int codePoint) {
 107         if (codePoint < 0x80) {
 108             append(codePoint);
 109         } else if (codePoint < 0x800) {
 110             aux[0] = (byte)(0xc0 | (codePoint >>> 6));
 111             aux[1] = tailByte(codePoint & 0x3f);
 112             append(aux, 0, 2);
 113         } else if (codePoint < 0x10000) {
 114             aux[0] = (byte)(0xe0 | (codePoint >>> 12));
 115             aux[1] = tailByte(codePoint >>> 6);
 116             aux[2] = tailByte(codePoint);
 117             append(aux, 0, 3);
 118         } else {
 119             aux[0] = (byte)(0xf0 | codePoint >>> 18);
 120             aux[1] = tailByte(codePoint >>> 12);
 121             aux[2] = tailByte(codePoint >>> 6);
 122             aux[3] = tailByte(codePoint);
 123             append(aux, 0, 4);
 124         }
 125     }
 126
 127     private byte tailByte(int value) {
 128         return (byte)(0x80 | (value & 0x3f));
 129     }
 130
 131     /**
 132      * Reads a 4-digit unsigned hexadecimal number from the source.
 133      */
 134     private int readHex() {
 135         int numberStart = pos;
 136         int result = 0;
 137         int length = 4;
 138         for (int i = 0; i < length; i++) {
 139             int digit = readUtf8Char();
 140             int digitValue;
 141             if (digit >= '0' && digit <= '9') {
 142                 digitValue = digit - '0';
 143             } else if (digit >= 'a' && digit <= 'f') {
 144                 digitValue = 10 + digit - 'a';
 145             } else if (digit >= 'A' && digit <= 'F') {
 146                 digitValue = 10 + digit - 'A';
 147             } else {
 148                 throw new NumberFormatException("Invalid base 16 number "
 149                         + src.subSequence(numberStart, numberStart + length));
 150             }
 151             result = result * 16 + digitValue;
 152         }
 153         return result;
 154     }
 155
 156     @Override
 157     protected RaiseException invalidUtf8() {
 158         ByteList message = new ByteList(
 159                 ByteList.plain("partial character in source, " +
 160                                "but hit end near "));
 161         int start = surrogatePairStart != -1 ? surrogatePairStart : charStart;
 162         message.append(src, start, srcEnd - start);
 163         return Utils.newException(context, Utils.M_PARSER_ERROR,
 164                                   context.getRuntime().newString(message));
 165     }
 166 }