001/**
002 *
003 * Copyright © 2015-2024 Florian Schmaus
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.jxmpp.xml.splitter;
018
019import java.io.IOException;
020import java.io.OutputStream;
021import java.nio.ByteBuffer;
022import java.util.Arrays;
023import java.util.Collection;
024
025/**
026 * Extended version of {@link XmppXmlSplitter} allowing input to be bytes or
027 * {@link ByteBuffer} representing a UTF-8 encoded XML string for XMPP. Just as
028 * they come from a network socket.
029 * <p>
030 * This class respects the byte order mark (BOM )requirement of RFC 6120 11.6
031 * and treats the BOM as zero width no-break space, and not as byte order mark.
032 * </p>
033 * 
034 * @author Florian Schmaus
035 *
036 */
037public class Utf8ByteXmppXmlSplitter extends OutputStream {
038
039        private final XmppXmlSplitter xmppXmlSplitter;
040
041        /**
042         * Create a new splitter with the given callback.
043         *
044         * @param xmppElementCallback the callback invoked once a complete element has been processed.
045         */
046        public Utf8ByteXmppXmlSplitter(XmppElementCallback xmppElementCallback) {
047                this(new XmppXmlSplitter(xmppElementCallback));
048        }
049
050        /**
051         * Create a new UTF-8 splitter with the given XMPP XML splitter.
052         *
053         * @param xmppXmlSplitter the used XMPP XML splitter.
054         */
055        public Utf8ByteXmppXmlSplitter(XmppXmlSplitter xmppXmlSplitter) {
056                this.xmppXmlSplitter = xmppXmlSplitter;
057        }
058
059        private final byte[] buffer = new byte[6];
060
061        private char[] writeBuffer = new char[1024];
062        private int writeBufferPos;
063        private byte count;
064        private byte expectedLength;
065
066        @Override
067        public void write(int b) throws IOException {
068                write((byte) (b & 0xff));
069        }
070
071        /**
072         * Write a single byte. The byte must be part of a UTF-8 String.
073         *
074         * @param b the byte to write.
075         * @throws IOException if an error occurs.
076         */
077        public void write(byte b) throws IOException {
078                process(b);
079                afterInputProcessed();
080        }
081
082        /**
083         * Write the given array of byte buffers.
084         *
085         * @param byteBuffers the array of byte buffers.
086         * @throws IOException if an error occurs.
087         */
088        public void write(ByteBuffer[] byteBuffers) throws IOException {
089                write(Arrays.asList(byteBuffers));
090        }
091
092        /**
093         * Write the given collection of byte buffers.
094         *
095         * @param byteBuffers the collection of byte buffers.
096         * @throws IOException if an error occurs.
097         */
098        public void write(Collection<? extends ByteBuffer> byteBuffers) throws IOException {
099                int requiredNewCapacity = 0;
100                for (ByteBuffer byteBuffer : byteBuffers) {
101                        requiredNewCapacity += byteBuffer.remaining();
102                }
103
104                ensureWriteBufferHasCapacityFor(requiredNewCapacity);
105
106                for (ByteBuffer byteBuffer : byteBuffers) {
107                        writeByteBufferInternal(byteBuffer);
108                }
109
110                afterInputProcessed();
111        }
112
113        /**
114         * Write the given byte buffer.
115         *
116         * @param byteBuffer the byte buffer.
117         * @throws IOException if an error occurs.
118         */
119        public void write(ByteBuffer byteBuffer) throws IOException {
120                final int remaining = byteBuffer.remaining();
121                ensureWriteBufferHasCapacityFor(remaining);
122
123                writeByteBufferInternal(byteBuffer);
124
125                afterInputProcessed();
126        }
127
128        private void writeByteBufferInternal(ByteBuffer byteBuffer) throws IOException {
129                final int remaining = byteBuffer.remaining();
130
131                if (byteBuffer.hasArray()) {
132                        writeInternal(byteBuffer.array(), byteBuffer.arrayOffset(), remaining);
133                } else {
134                        int initialPosition = byteBuffer.position();
135                        for (int i = 0; i < remaining; i++) {
136                                process(byteBuffer.get(initialPosition + i));
137                        }
138                }
139
140                ((java.nio.Buffer) byteBuffer).flip();
141        }
142
143        @Override
144        public void write(byte[] b, int offset, int length) throws IOException {
145                ensureWriteBufferHasCapacityFor(length);
146
147                writeInternal(b, offset, length);
148
149                afterInputProcessed();
150        }
151
152        private void writeInternal(byte[] b, int offset, int length) throws IOException {
153                for (int i = 0; i < length; i++ ) {
154                        process(b[offset + i]);
155                }
156        }
157
158        /**
159         * Reset the write buffer to the given size.
160         *
161         * @param size the new write buffer size.
162         */
163        public void resetWriteBuffer(int size) {
164                writeBuffer = new char[size];
165                writeBufferPos = 0;
166        }
167
168        private void process(byte b) throws IOException {
169                buffer[count] = b;
170
171                if (count == 0) {
172                        int firstByte = buffer[0] & 0xff;
173                        if (firstByte < 0x80) {
174                                expectedLength = 1;
175                        } else if (firstByte < 0xe0) {
176                                expectedLength = 2;
177                        } else if (firstByte < 0xf0) {
178                                expectedLength = 3;
179                        } else if (firstByte < 0xf8) {
180                                expectedLength = 4;
181                        } else {
182                                throw new IOException("Invalid first UTF-8 byte: " + firstByte);
183                        }
184                }
185
186                if (++count == expectedLength) {
187                        int codepoint;
188                        if (expectedLength == 1) {
189                                codepoint = buffer[0] & 0x7f;
190                        } else {
191                                // The following switch-case could also be omitted. Note sure
192                                // how it would affect performance. Using switch-case means that
193                                // the bitsToMask does not need to be calculated, but the code
194                                // would be shorter if the switch-code was not here and maybe
195                                // this affects JIT'ed performance (maybe even positive).
196                                switch (expectedLength) {
197                                case 2:
198                                        codepoint = buffer[0] & 0x1f;
199                                        codepoint <<= 6 * 1;
200                                        break;
201                                case 3:
202                                        codepoint = buffer[0] & 0xf;
203                                        codepoint <<= 6 * 2;
204                                        break;
205                                case 4:
206                                        codepoint = buffer[0] & 0x6;
207                                        codepoint <<= 6 * 3;
208                                        break;
209                                default:
210                                        throw new IllegalStateException();
211                                }
212
213                                for (int i = 1; i < expectedLength; i++) {
214                                        // Get the lower 6 bits.
215                                        int bits = buffer[i] & 0x3f;
216                                        // Shift the bits to the right position.
217                                        bits <<= 6 * (expectedLength - 1 - i);
218                                        codepoint |= bits;
219                                }
220                        }
221
222                        ensureWriteBufferHasCapacityFor(2);
223
224                        if (codepoint < 0x10000) {
225                                appendToWriteBuffer((char) codepoint);
226                        } else {
227                                // We have to convert the codepoint into a surrogate pair.
228                                // high surrogate: top ten bits added to 0xd800 give the first 16-bit code unit.
229                                appendToWriteBuffer((char) (0xd800 + (codepoint & 0xffa00000)));
230                                // low surrogate: low ten bits added to 0xdc00 give the second 16-bit code unit.
231                                appendToWriteBuffer((char) (0xdc00 + (codepoint & 0x3ff)));
232                        }
233
234                        // Reset count since we are done handling this UTF-8 codepoint.
235                        count = 0;
236                }
237        }
238
239        private void afterInputProcessed() throws IOException {
240                xmppXmlSplitter.write(writeBuffer, 0, writeBufferPos);
241                writeBufferPos = 0;
242        }
243
244        private void appendToWriteBuffer(char c) {
245                writeBuffer[writeBufferPos++] = c;
246        }
247
248        private void ensureWriteBufferHasCapacityFor(int additionalCapacity) {
249                final int requiredCapacity = writeBufferPos + additionalCapacity;
250                if (requiredCapacity <= writeBuffer.length) {
251                        return;
252                }
253
254                // Simple resize logic of write buffer.
255                char[] newWriteBuffer = new char[requiredCapacity];
256                System.arraycopy(writeBuffer, 0, newWriteBuffer, 0, writeBufferPos);
257                writeBuffer = newWriteBuffer;
258        }
259}