3 import org.jruby.exceptions.RaiseException;
4 import org.jruby.runtime.ThreadContext;
5 import org.jruby.util.ByteList;
8 * An encoder that reads from the given source and outputs its representation
9 * to another ByteList. The source string is fully checked for UTF-8 validity,
10 * and throws a GeneratorError if any problem is found.
12 final class StringEncoder extends ByteListTranscoder {
13 private final boolean asciiOnly;
15 // Escaped characters will reuse this array, to avoid new allocations
16 // or appending them byte-by-byte
17 private final byte[] aux =
18 new byte[] {/* First unicode character */
19 '\\', 'u', 0, 0, 0, 0,
20 /* Second unicode character (for surrogate pairs) */
21 '\\', 'u', 0, 0, 0, 0,
24 // offsets on the array above
25 private static final int ESCAPE_UNI1_OFFSET = 0;
26 private static final int ESCAPE_UNI2_OFFSET = ESCAPE_UNI1_OFFSET + 6;
27 private static final int ESCAPE_CHAR_OFFSET = ESCAPE_UNI2_OFFSET + 6;
28 /** Array used for code point decomposition in surrogates */
29 private final char[] utf16 = new char[2];
31 private static final byte[] HEX =
32 new byte[] {'0', '1', '2', '3', '4', '5', '6', '7',
33 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
35 StringEncoder(ThreadContext context, boolean asciiOnly) {
37 this.asciiOnly = asciiOnly;
40 void encode(ByteList src, ByteList out) {
44 handleChar(readUtf8Char());
50 private void handleChar(int c) {
72 if (c >= 0x20 && c <= 0x7f ||
73 (c >= 0x80 && !asciiOnly)) {
82 private void escapeChar(char c) {
84 aux[ESCAPE_CHAR_OFFSET + 1] = (byte)c;
85 append(aux, ESCAPE_CHAR_OFFSET, 2);
88 private void escapeUtf8Char(int codePoint) {
89 int numChars = Character.toChars(codePoint, utf16, 0);
90 escapeCodeUnit(utf16[0], ESCAPE_UNI1_OFFSET + 2);
91 if (numChars > 1) escapeCodeUnit(utf16[1], ESCAPE_UNI2_OFFSET + 2);
92 append(aux, ESCAPE_UNI1_OFFSET, 6 * numChars);
95 private void escapeCodeUnit(char c, int auxOffset) {
96 for (int i = 0; i < 4; i++) {
97 aux[auxOffset + i] = HEX[(c >>> (12 - 4 * i)) & 0xf];
102 protected RaiseException invalidUtf8() {
103 return Utils.newException(context, Utils.M_GENERATOR_ERROR,
104 "source sequence is illegal/malformed utf-8");