lib/mcollective/vendor/json/java/src/json/ext/ByteListTranscoder.java

   1 /*
   2  * This code is copyrighted work by Daniel Luz <dev at mernen dot com>.
   3  *
   4  * Distributed under the Ruby and GPLv2 licenses; see COPYING and GPL files
   5  * for details.
   6  */
   7 package json.ext;
   8
   9 import org.jruby.exceptions.RaiseException;
  10 import org.jruby.runtime.ThreadContext;
  11 import org.jruby.util.ByteList;
  12
  13 /**
  14  * A class specialized in transcoding a certain String format into another,
  15  * using UTF-8 ByteLists as both input and output.
  16  */
  17 abstract class ByteListTranscoder {
  18     protected final ThreadContext context;
  19
  20     protected ByteList src;
  21     protected int srcEnd;
  22     /** Position where the last read character started */
  23     protected int charStart;
  24     /** Position of the next character to read */
  25     protected int pos;
  26
  27     private ByteList out;
  28     /**
  29      * When a character that can be copied straight into the output is found,
  30      * its index is stored on this variable, and copying is delayed until
  31      * the sequence of characters that can be copied ends.
  32      *
  33      * <p>The variable stores -1 when not in a plain sequence.
  34      */
  35     private int quoteStart = -1;
  36
  37     protected ByteListTranscoder(ThreadContext context) {
  38         this.context = context;
  39     }
  40
  41     protected void init(ByteList src, ByteList out) {
  42         this.init(src, 0, src.length(), out);
  43     }
  44
  45     protected void init(ByteList src, int start, int end, ByteList out) {
  46         this.src = src;
  47         this.pos = start;
  48         this.charStart = start;
  49         this.srcEnd = end;
  50         this.out = out;
  51     }
  52
  53     /**
  54      * Returns whether there are any characters left to be read.
  55      */
  56     protected boolean hasNext() {
  57         return pos < srcEnd;
  58     }
  59
  60     /**
  61      * Returns the next character in the buffer.
  62      */
  63     private char next() {
  64         return src.charAt(pos++);
  65     }
  66
  67     /**
  68      * Reads an UTF-8 character from the input and returns its code point,
  69      * while advancing the input position.
  70      *
  71      * <p>Raises an {@link #invalidUtf8()} exception if an invalid byte
  72      * is found.
  73      */
  74     protected int readUtf8Char() {
  75         charStart = pos;
  76         char head = next();
  77         if (head <= 0x7f) { // 0b0xxxxxxx (ASCII)
  78             return head;
  79         }
  80         if (head <= 0xbf) { // 0b10xxxxxx
  81             throw invalidUtf8(); // tail byte with no head
  82         }
  83         if (head <= 0xdf) { // 0b110xxxxx
  84             ensureMin(1);
  85             int cp = ((head  & 0x1f) << 6)
  86                      | nextPart();
  87             if (cp < 0x0080) throw invalidUtf8();
  88             return cp;
  89         }
  90         if (head <= 0xef) { // 0b1110xxxx
  91             ensureMin(2);
  92             int cp = ((head & 0x0f) << 12)
  93                      | (nextPart()  << 6)
  94                      | nextPart();
  95             if (cp < 0x0800) throw invalidUtf8();
  96             return cp;
  97         }
  98         if (head <= 0xf7) { // 0b11110xxx
  99             ensureMin(3);
 100             int cp = ((head & 0x07) << 18)
 101                      | (nextPart()  << 12)
 102                      | (nextPart()  << 6)
 103                      | nextPart();
 104             if (!Character.isValidCodePoint(cp)) throw invalidUtf8();
 105             return cp;
 106         }
 107         // 0b11111xxx?
 108         throw invalidUtf8();
 109     }
 110
 111     /**
 112      * Throws a GeneratorError if the input list doesn't have at least this
 113      * many bytes left.
 114      */
 115     protected void ensureMin(int n) {
 116         if (pos + n > srcEnd) throw incompleteUtf8();
 117     }
 118
 119     /**
 120      * Reads the next byte of a multi-byte UTF-8 character and returns its
 121      * contents (lower 6 bits).
 122      *
 123      * <p>Throws a GeneratorError if the byte is not a valid tail.
 124      */
 125     private int nextPart() {
 126         char c = next();
 127         // tail bytes must be 0b10xxxxxx
 128         if ((c & 0xc0) != 0x80) throw invalidUtf8();
 129         return c & 0x3f;
 130     }
 131
 132
 133     protected void quoteStart() {
 134         if (quoteStart == -1) quoteStart = charStart;
 135     }
 136
 137     /**
 138      * When in a sequence of characters that can be copied directly,
 139      * interrupts the sequence and copies it to the output buffer.
 140      *
 141      * @param endPos The offset until which the direct character quoting should
 142      *               occur. You may pass {@link #pos} to quote until the most
 143      *               recently read character, or {@link #charStart} to quote
 144      *               until the character before it.
 145      */
 146     protected void quoteStop(int endPos) {
 147         if (quoteStart != -1) {
 148             out.append(src, quoteStart, endPos - quoteStart);
 149             quoteStart = -1;
 150         }
 151     }
 152
 153     protected void append(int b) {
 154         out.append(b);
 155     }
 156
 157     protected void append(byte[] origin, int start, int length) {
 158         out.append(origin, start, length);
 159     }
 160
 161
 162     protected abstract RaiseException invalidUtf8();
 163
 164     protected RaiseException incompleteUtf8() {
 165         return invalidUtf8();
 166     }
 167 }