001/** 002 * 003 * Copyright © 2015-2024 Florian Schmaus 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.jxmpp.xml.splitter; 018 019import java.io.IOException; 020import java.io.OutputStream; 021import java.nio.ByteBuffer; 022import java.util.Arrays; 023import java.util.Collection; 024 025/** 026 * Extended version of {@link XmppXmlSplitter} allowing input to be bytes or 027 * {@link ByteBuffer} representing a UTF-8 encoded XML string for XMPP. Just as 028 * they come from a network socket. 029 * <p> 030 * This class respects the byte order mark (BOM )requirement of RFC 6120 11.6 031 * and treats the BOM as zero width no-break space, and not as byte order mark. 032 * </p> 033 * 034 * @author Florian Schmaus 035 * 036 */ 037public class Utf8ByteXmppXmlSplitter extends OutputStream { 038 039 private final XmppXmlSplitter xmppXmlSplitter; 040 041 /** 042 * Create a new splitter with the given callback. 043 * 044 * @param xmppElementCallback the callback invoked once a complete element has been processed. 045 */ 046 public Utf8ByteXmppXmlSplitter(XmppElementCallback xmppElementCallback) { 047 this(new XmppXmlSplitter(xmppElementCallback)); 048 } 049 050 /** 051 * Create a new UTF-8 splitter with the given XMPP XML splitter. 052 * 053 * @param xmppXmlSplitter the used XMPP XML splitter. 054 */ 055 public Utf8ByteXmppXmlSplitter(XmppXmlSplitter xmppXmlSplitter) { 056 this.xmppXmlSplitter = xmppXmlSplitter; 057 } 058 059 private final byte[] buffer = new byte[6]; 060 061 private char[] writeBuffer = new char[1024]; 062 private int writeBufferPos; 063 private byte count; 064 private byte expectedLength; 065 066 @Override 067 public void write(int b) throws IOException { 068 write((byte) (b & 0xff)); 069 } 070 071 /** 072 * Write a single byte. The byte must be part of a UTF-8 String. 073 * 074 * @param b the byte to write. 075 * @throws IOException if an error occurs. 076 */ 077 public void write(byte b) throws IOException { 078 process(b); 079 afterInputProcessed(); 080 } 081 082 /** 083 * Write the given array of byte buffers. 084 * 085 * @param byteBuffers the array of byte buffers. 086 * @throws IOException if an error occurs. 087 */ 088 public void write(ByteBuffer[] byteBuffers) throws IOException { 089 write(Arrays.asList(byteBuffers)); 090 } 091 092 /** 093 * Write the given collection of byte buffers. 094 * 095 * @param byteBuffers the collection of byte buffers. 096 * @throws IOException if an error occurs. 097 */ 098 public void write(Collection<? extends ByteBuffer> byteBuffers) throws IOException { 099 int requiredNewCapacity = 0; 100 for (ByteBuffer byteBuffer : byteBuffers) { 101 requiredNewCapacity += byteBuffer.remaining(); 102 } 103 104 ensureWriteBufferHasCapacityFor(requiredNewCapacity); 105 106 for (ByteBuffer byteBuffer : byteBuffers) { 107 writeByteBufferInternal(byteBuffer); 108 } 109 110 afterInputProcessed(); 111 } 112 113 /** 114 * Write the given byte buffer. 115 * 116 * @param byteBuffer the byte buffer. 117 * @throws IOException if an error occurs. 118 */ 119 public void write(ByteBuffer byteBuffer) throws IOException { 120 final int remaining = byteBuffer.remaining(); 121 ensureWriteBufferHasCapacityFor(remaining); 122 123 writeByteBufferInternal(byteBuffer); 124 125 afterInputProcessed(); 126 } 127 128 private void writeByteBufferInternal(ByteBuffer byteBuffer) throws IOException { 129 final int remaining = byteBuffer.remaining(); 130 131 if (byteBuffer.hasArray()) { 132 writeInternal(byteBuffer.array(), byteBuffer.arrayOffset(), remaining); 133 } else { 134 int initialPosition = byteBuffer.position(); 135 for (int i = 0; i < remaining; i++) { 136 process(byteBuffer.get(initialPosition + i)); 137 } 138 } 139 140 ((java.nio.Buffer) byteBuffer).flip(); 141 } 142 143 @Override 144 public void write(byte[] b, int offset, int length) throws IOException { 145 ensureWriteBufferHasCapacityFor(length); 146 147 writeInternal(b, offset, length); 148 149 afterInputProcessed(); 150 } 151 152 private void writeInternal(byte[] b, int offset, int length) throws IOException { 153 for (int i = 0; i < length; i++ ) { 154 process(b[offset + i]); 155 } 156 } 157 158 /** 159 * Reset the write buffer to the given size. 160 * 161 * @param size the new write buffer size. 162 */ 163 public void resetWriteBuffer(int size) { 164 writeBuffer = new char[size]; 165 writeBufferPos = 0; 166 } 167 168 private void process(byte b) throws IOException { 169 buffer[count] = b; 170 171 if (count == 0) { 172 int firstByte = buffer[0] & 0xff; 173 if (firstByte < 0x80) { 174 expectedLength = 1; 175 } else if (firstByte < 0xe0) { 176 expectedLength = 2; 177 } else if (firstByte < 0xf0) { 178 expectedLength = 3; 179 } else if (firstByte < 0xf8) { 180 expectedLength = 4; 181 } else { 182 throw new IOException("Invalid first UTF-8 byte: " + firstByte); 183 } 184 } 185 186 if (++count == expectedLength) { 187 int codepoint; 188 if (expectedLength == 1) { 189 codepoint = buffer[0] & 0x7f; 190 } else { 191 // The following switch-case could also be omitted. Note sure 192 // how it would affect performance. Using switch-case means that 193 // the bitsToMask does not need to be calculated, but the code 194 // would be shorter if the switch-code was not here and maybe 195 // this affects JIT'ed performance (maybe even positive). 196 switch (expectedLength) { 197 case 2: 198 codepoint = buffer[0] & 0x1f; 199 codepoint <<= 6 * 1; 200 break; 201 case 3: 202 codepoint = buffer[0] & 0xf; 203 codepoint <<= 6 * 2; 204 break; 205 case 4: 206 codepoint = buffer[0] & 0x6; 207 codepoint <<= 6 * 3; 208 break; 209 default: 210 throw new IllegalStateException(); 211 } 212 213 for (int i = 1; i < expectedLength; i++) { 214 // Get the lower 6 bits. 215 int bits = buffer[i] & 0x3f; 216 // Shift the bits to the right position. 217 bits <<= 6 * (expectedLength - 1 - i); 218 codepoint |= bits; 219 } 220 } 221 222 ensureWriteBufferHasCapacityFor(2); 223 224 if (codepoint < 0x10000) { 225 appendToWriteBuffer((char) codepoint); 226 } else { 227 // We have to convert the codepoint into a surrogate pair. 228 // high surrogate: top ten bits added to 0xd800 give the first 16-bit code unit. 229 appendToWriteBuffer((char) (0xd800 + (codepoint & 0xffa00000))); 230 // low surrogate: low ten bits added to 0xdc00 give the second 16-bit code unit. 231 appendToWriteBuffer((char) (0xdc00 + (codepoint & 0x3ff))); 232 } 233 234 // Reset count since we are done handling this UTF-8 codepoint. 235 count = 0; 236 } 237 } 238 239 private void afterInputProcessed() throws IOException { 240 xmppXmlSplitter.write(writeBuffer, 0, writeBufferPos); 241 writeBufferPos = 0; 242 } 243 244 private void appendToWriteBuffer(char c) { 245 writeBuffer[writeBufferPos++] = c; 246 } 247 248 private void ensureWriteBufferHasCapacityFor(int additionalCapacity) { 249 final int requiredCapacity = writeBufferPos + additionalCapacity; 250 if (requiredCapacity <= writeBuffer.length) { 251 return; 252 } 253 254 // Simple resize logic of write buffer. 255 char[] newWriteBuffer = new char[requiredCapacity]; 256 System.arraycopy(writeBuffer, 0, newWriteBuffer, 0, writeBufferPos); 257 writeBuffer = newWriteBuffer; 258 } 259}