001/**
002 *
003 * Copyright © 2015-2017 Florian Schmaus
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.jxmpp.xml.splitter;
018
019import java.io.IOException;
020import java.io.Writer;
021import java.util.HashMap;
022import java.util.Map;
023
024/**
025 * A XML splitter capable of splitting XML into top-level elements.
026 * <p>
027 * Note that this class does not yet support the feature set of XML. Only the
028 * required features for XMPP are supported. XML comments and processing
029 * instructions are not supported.
030 * </p>
031 *
032 */
033public class XmlSplitter extends Writer {
034
035        private enum State {
036                START,
037                AFTER_TAG_RIGHT_ANGLE_BRACKET,
038                IN_TAG_NAME,
039                IN_END_TAG,
040                AFTER_START_NAME,
041                IN_EMPTY_TAG,
042                IN_ATTRIBUTE_NAME,
043                AFTER_ATTRIBUTE_EQUALS,
044                IN_ATTRIBUTE_VALUE,
045                AFTER_COMMENT_BANG,
046                AFTER_COMMENT_DASH1,
047                AFTER_COMMENT_DASH2,
048                AFTER_COMMENT,
049                AFTER_COMMENT_CLOSING_DASH1,
050                AFTER_COMMENT_CLOSING_DASH2,
051                IN_PROCESSING_INSTRUCTION_OR_DECLARATION,
052                IN_PROCESSING_INSTRUCTION_OR_DECLARATION_PSEUDO_ATTRIBUTE_VALUE,
053                IN_PROCESSING_INSTRUCTION_OR_DECLARATION_QUESTION_MARK,
054        }
055
056        private final DeclarationCallback declarationCallback;
057        private final ProcessingInstructionCallback processingInstructionCallback;
058
059        protected final CompleteElementCallback completeElementCallback;
060
061        private final StringBuilder splittedPartBuffer;
062
063        private final StringBuilder tokenBuffer = new StringBuilder(256);
064        private final Map<String, String> attributes = new HashMap<>();
065
066        private int depth;
067        private String qName;
068        private String attributeName;
069        private State state = State.START;
070
071        private enum AttributeValueQuotes {
072                apos('\''),
073                quot('"'),
074                ;
075
076                final char c;
077
078                AttributeValueQuotes(char c) {
079                        this.c = c;
080                }
081        }
082
083        /**
084         * The type of quotation used for the current (or last) attribute. Note that depending on which quotation is used,
085         * the other quotation does not need to be escaped within the value. Therefore we need to remember it to reliable
086         * detect the end quotation of the value.
087         */
088        private AttributeValueQuotes attributeValueQuotes;
089
090        /**
091         * Construct a new XML splitter.
092         *
093         * @param bufferSize the initial size of the buffer.
094         * @param completeElementCallback the callback invoked once a complete element has been processed.
095         * @param declarationCallback a optional callback for the XML declaration.
096         * @param processingInstructionCallback a optional callback for Processing Instructions.
097         */
098        public XmlSplitter(int bufferSize, CompleteElementCallback completeElementCallback, DeclarationCallback declarationCallback, ProcessingInstructionCallback processingInstructionCallback) {
099                this.splittedPartBuffer = new StringBuilder(bufferSize);
100                if (completeElementCallback == null) {
101                        throw new IllegalArgumentException();
102                }
103                this.completeElementCallback = completeElementCallback;
104                this.declarationCallback = declarationCallback;
105                this.processingInstructionCallback = processingInstructionCallback;
106        }
107
108        /**
109         * Construct a new XML splitter.
110         *
111         * @param bufferSize the initial size of the buffer.
112         * @param completeElementCallback the callback invoked once a complete element has been processed.
113         */
114        public XmlSplitter(int bufferSize, CompleteElementCallback completeElementCallback) {
115                this(bufferSize, completeElementCallback, null, null);
116        }
117
118        @Override
119        public void write(char[] cbuf, int off, int len) throws IOException {
120                for (int cur = off; cur < off+len; cur++) {
121                        processChar(cbuf[off+cur]);
122                }
123        }
124
125        @Override
126        public void flush() throws IOException {
127        }
128
129        @Override
130        public void close() throws IOException {
131        }
132
133        /**
134         * Get the size in bytes of the splitted part currently being processed.
135         * 
136         * @return the size of the current splitted part in chars.
137         */
138        public final int getCurrentSplittedPartSize() {
139                return splittedPartBuffer.length();
140        }
141
142        protected void onNextChar() throws IOException {
143        }
144
145        protected void onStartTag(String prefix, String localpart, Map<String, String> attributes) {
146        }
147
148        protected void onEndTag(String qName) {
149        }
150
151        protected final void newSplittedPart() {
152                depth = 0;
153                splittedPartBuffer.setLength(0);
154
155                assert state != State.START;
156                state = State.START;
157        }
158
159        private void processChar(char c) throws IOException {
160                onNextChar();
161
162                // Append every char we see to the buffer. This helps for example XmppXmlSplitter to ensure a certain size is
163                // not exceeded. In case of XMPP, the size is usually for the top level stream element (Stanzas and Nonzas), but
164                // also other XML pseudo-elements like the Declaration or Processing Instructions's size is limited by this.
165                splittedPartBuffer.append(c);
166
167                switch (state) {
168                case START:
169                        switch (c) {
170                        case '<':
171                                state = State.AFTER_TAG_RIGHT_ANGLE_BRACKET;
172                                break;
173                        }
174                        break;
175                case AFTER_TAG_RIGHT_ANGLE_BRACKET:
176                        switch (c) {
177                        case '/':
178                                state = State.IN_END_TAG;
179                                break;
180                        case '?':
181                                state = State.IN_PROCESSING_INSTRUCTION_OR_DECLARATION;
182                                break;
183                        case '!':
184                                state = State.AFTER_COMMENT_BANG;
185                                break;
186                        default:
187                                tokenBuffer.append(c);
188                                state = State.IN_TAG_NAME;
189                                break;
190                        }
191                        break;
192                case IN_TAG_NAME:
193                        switch (c) {
194                        // XML 1.1 § 2.3 "White Space"
195                        case ' ':
196                        case '\n':
197                        case '\r':
198                        case '\t':
199                                qName = getToken();
200                                state = State.AFTER_START_NAME;
201                                break;
202                        case '/':
203                                qName = getToken();
204                                onStartTagFinished();
205                                state = State.IN_EMPTY_TAG;
206                                break;
207                        case '>':
208                                qName = getToken();
209                                onStartTagFinished();
210                                state = State.START;
211                                break;
212                        default:
213                                tokenBuffer.append(c);
214                                break;
215                        }
216                        break;
217                case IN_END_TAG:
218                        switch (c) {
219                        case '>':
220                                onEndTagFinished();
221                                break;
222                        default:
223                                tokenBuffer.append(c);
224                                break;
225                        }
226                        break;
227                case AFTER_START_NAME:
228                        switch (c) {
229                        case '/':
230                                onStartTagFinished();
231                                state = State.IN_EMPTY_TAG;
232                                break;
233                        case '>':
234                                onStartTagFinished();
235                                state = State.START;
236                                break;
237                        // XML 1.1 § 2.3 "White Space"
238                        case ' ':
239                        case '\n':
240                        case '\r':
241                        case '\t':
242                                break;
243                        // Attribute Name
244                        default:
245                                tokenBuffer.append(c);
246                                state = State.IN_ATTRIBUTE_NAME;
247                                break;
248                        }
249                        break;
250                case IN_ATTRIBUTE_NAME:
251                        switch (c) {
252                        case '=':
253                                attributeName = getToken();
254                                state = State.AFTER_ATTRIBUTE_EQUALS;
255                                break;
256                        default:
257                                tokenBuffer.append(c);
258                        }
259                        break;
260                case AFTER_ATTRIBUTE_EQUALS:
261                        switch (c) {
262                        case '\'':
263                                attributeValueQuotes = AttributeValueQuotes.apos;
264                                state = State.IN_ATTRIBUTE_VALUE;
265                                break;
266                        case '\"':
267                                attributeValueQuotes = AttributeValueQuotes.quot;
268                                state = State.IN_ATTRIBUTE_VALUE;
269                                break;
270                        default:
271                                throw new IOException();
272                        }
273                        break;
274                case IN_ATTRIBUTE_VALUE:
275                        if (c == attributeValueQuotes.c) {
276                                attributes.put(attributeName, getToken());
277                                state = State.AFTER_START_NAME;
278                        } else {
279                                tokenBuffer.append(c);
280                        }
281                        break;
282                case IN_EMPTY_TAG:
283                        switch (c) {
284                        case '>':
285                                onEndTagFinished();
286                                break;
287                        default:
288                                throw new IOException();
289                        }
290                        break;
291                case IN_PROCESSING_INSTRUCTION_OR_DECLARATION:
292                        switch (c) {
293                                case '\'':
294                                        attributeValueQuotes = AttributeValueQuotes.apos;
295                                        state = State.IN_PROCESSING_INSTRUCTION_OR_DECLARATION_PSEUDO_ATTRIBUTE_VALUE;
296                                        break;
297                                case '\"':
298                                        attributeValueQuotes = AttributeValueQuotes.quot;
299                                        state = State.IN_PROCESSING_INSTRUCTION_OR_DECLARATION_PSEUDO_ATTRIBUTE_VALUE;
300                                        break;
301                                case '?':
302                                        state = State.IN_PROCESSING_INSTRUCTION_OR_DECLARATION_QUESTION_MARK;
303                                        break;
304                        }
305                        break;
306                case IN_PROCESSING_INSTRUCTION_OR_DECLARATION_PSEUDO_ATTRIBUTE_VALUE:
307                        if (c == attributeValueQuotes.c) {
308                                state = State.IN_PROCESSING_INSTRUCTION_OR_DECLARATION;
309                        }
310                        break;
311                case IN_PROCESSING_INSTRUCTION_OR_DECLARATION_QUESTION_MARK:
312                        if (c == '>') {
313                                String processingInstructionOrDeclaration = splittedPartBuffer.toString();
314                                onProcessingInstructionOrDeclaration(processingInstructionOrDeclaration);
315                                newSplittedPart();
316                        } else {
317                                state = State.IN_PROCESSING_INSTRUCTION_OR_DECLARATION;
318                        }
319                        break;
320                case AFTER_COMMENT_BANG:
321                case AFTER_COMMENT_DASH1:
322                case AFTER_COMMENT_DASH2:
323                case AFTER_COMMENT:
324                case AFTER_COMMENT_CLOSING_DASH1:
325                case AFTER_COMMENT_CLOSING_DASH2:
326                        throw new UnsupportedOperationException();
327                }
328        }
329
330        private void onStartTagFinished() {
331                // qName should already be set correctly.
332                depth++;
333                String prefix = extractPrefix(qName);
334                String localpart = extractLocalpart(qName);
335                onStartTag(prefix, localpart, attributes);
336                attributes.clear();
337        }
338
339        private void onEndTagFinished() {
340                String endTagName = getToken();
341                if (endTagName.length() == 0) {
342                        // empty element case
343                        endTagName = qName;
344                }
345                depth--;
346                if (depth == 0) {
347                        String completeElement = splittedPartBuffer.toString();
348                        splittedPartBuffer.setLength(0);
349                        completeElementCallback.onCompleteElement(completeElement);
350                }
351                onEndTag(endTagName);
352
353                assert state != State.START;
354                state = State.START;
355        }
356
357        private String getToken() {
358                String token = tokenBuffer.toString();
359                tokenBuffer.setLength(0);
360                return token;
361        }
362
363        private void onProcessingInstructionOrDeclaration(String processingInstructionOrDeclaration) {
364                if (processingInstructionOrDeclaration.startsWith("<?xml ")) {
365                        if (declarationCallback != null) {
366                                declarationCallback.onDeclaration(processingInstructionOrDeclaration);
367                        }
368                } else {
369                        if (processingInstructionCallback != null) {
370                                processingInstructionCallback.onProcessingInstruction(processingInstructionOrDeclaration);
371                        }
372                }
373        }
374
375        private static String extractPrefix(String qName) {
376                int index = qName.indexOf(':');
377                return index > -1  ? qName.substring(0, index) : qName;
378        }
379
380        private static String extractLocalpart(String qName) {
381                int index = qName.indexOf(':');
382                return index > -1 ? qName.substring(index + 1) : qName;
383        }
384}