001/** 002 * 003 * Copyright © 2015-2017 Florian Schmaus 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.jxmpp.xml.splitter; 018 019import java.io.IOException; 020import java.io.Writer; 021import java.util.HashMap; 022import java.util.Map; 023 024/** 025 * A XML splitter capable of splitting XML into top-level elements. 026 * <p> 027 * Note that this class does not yet support the feature set of XML. Only the 028 * required features for XMPP are supported. XML comments and processing 029 * instructions are not supported. 030 * </p> 031 * 032 */ 033public class XmlSplitter extends Writer { 034 035 private enum State { 036 START, 037 AFTER_TAG_RIGHT_ANGLE_BRACKET, 038 IN_TAG_NAME, 039 IN_END_TAG, 040 AFTER_START_NAME, 041 IN_EMPTY_TAG, 042 IN_ATTRIBUTE_NAME, 043 AFTER_ATTRIBUTE_EQUALS, 044 IN_ATTRIBUTE_VALUE, 045 AFTER_COMMENT_BANG, 046 AFTER_COMMENT_DASH1, 047 AFTER_COMMENT_DASH2, 048 AFTER_COMMENT, 049 AFTER_COMMENT_CLOSING_DASH1, 050 AFTER_COMMENT_CLOSING_DASH2, 051 IN_PROCESSING_INSTRUCTION_OR_DECLARATION, 052 IN_PROCESSING_INSTRUCTION_OR_DECLARATION_PSEUDO_ATTRIBUTE_VALUE, 053 IN_PROCESSING_INSTRUCTION_OR_DECLARATION_QUESTION_MARK, 054 } 055 056 private final DeclarationCallback declarationCallback; 057 private final ProcessingInstructionCallback processingInstructionCallback; 058 059 protected final CompleteElementCallback completeElementCallback; 060 061 private final StringBuilder splittedPartBuffer; 062 063 private final StringBuilder tokenBuffer = new StringBuilder(256); 064 private final Map<String, String> attributes = new HashMap<>(); 065 066 private int depth; 067 private String qName; 068 private String attributeName; 069 private State state = State.START; 070 071 private enum AttributeValueQuotes { 072 apos('\''), 073 quot('"'), 074 ; 075 076 final char c; 077 078 AttributeValueQuotes(char c) { 079 this.c = c; 080 } 081 } 082 083 /** 084 * The type of quotation used for the current (or last) attribute. Note that depending on which quotation is used, 085 * the other quotation does not need to be escaped within the value. Therefore we need to remember it to reliable 086 * detect the end quotation of the value. 087 */ 088 private AttributeValueQuotes attributeValueQuotes; 089 090 /** 091 * Construct a new XML splitter. 092 * 093 * @param bufferSize the initial size of the buffer. 094 * @param completeElementCallback the callback invoked once a complete element has been processed. 095 * @param declarationCallback a optional callback for the XML declaration. 096 * @param processingInstructionCallback a optional callback for Processing Instructions. 097 */ 098 public XmlSplitter(int bufferSize, CompleteElementCallback completeElementCallback, DeclarationCallback declarationCallback, ProcessingInstructionCallback processingInstructionCallback) { 099 this.splittedPartBuffer = new StringBuilder(bufferSize); 100 if (completeElementCallback == null) { 101 throw new IllegalArgumentException(); 102 } 103 this.completeElementCallback = completeElementCallback; 104 this.declarationCallback = declarationCallback; 105 this.processingInstructionCallback = processingInstructionCallback; 106 } 107 108 /** 109 * Construct a new XML splitter. 110 * 111 * @param bufferSize the initial size of the buffer. 112 * @param completeElementCallback the callback invoked once a complete element has been processed. 113 */ 114 public XmlSplitter(int bufferSize, CompleteElementCallback completeElementCallback) { 115 this(bufferSize, completeElementCallback, null, null); 116 } 117 118 @Override 119 public void write(char[] cbuf, int off, int len) throws IOException { 120 for (int cur = off; cur < off+len; cur++) { 121 processChar(cbuf[off+cur]); 122 } 123 } 124 125 @Override 126 public void flush() throws IOException { 127 } 128 129 @Override 130 public void close() throws IOException { 131 } 132 133 /** 134 * Get the size in bytes of the splitted part currently being processed. 135 * 136 * @return the size of the current splitted part in chars. 137 */ 138 public final int getCurrentSplittedPartSize() { 139 return splittedPartBuffer.length(); 140 } 141 142 protected void onNextChar() throws IOException { 143 } 144 145 protected void onStartTag(String prefix, String localpart, Map<String, String> attributes) { 146 } 147 148 protected void onEndTag(String qName) { 149 } 150 151 protected final void newSplittedPart() { 152 depth = 0; 153 splittedPartBuffer.setLength(0); 154 155 assert state != State.START; 156 state = State.START; 157 } 158 159 private void processChar(char c) throws IOException { 160 onNextChar(); 161 162 // Append every char we see to the buffer. This helps for example XmppXmlSplitter to ensure a certain size is 163 // not exceeded. In case of XMPP, the size is usually for the top level stream element (Stanzas and Nonzas), but 164 // also other XML pseudo-elements like the Declaration or Processing Instructions's size is limited by this. 165 splittedPartBuffer.append(c); 166 167 switch (state) { 168 case START: 169 switch (c) { 170 case '<': 171 state = State.AFTER_TAG_RIGHT_ANGLE_BRACKET; 172 break; 173 } 174 break; 175 case AFTER_TAG_RIGHT_ANGLE_BRACKET: 176 switch (c) { 177 case '/': 178 state = State.IN_END_TAG; 179 break; 180 case '?': 181 state = State.IN_PROCESSING_INSTRUCTION_OR_DECLARATION; 182 break; 183 case '!': 184 state = State.AFTER_COMMENT_BANG; 185 break; 186 default: 187 tokenBuffer.append(c); 188 state = State.IN_TAG_NAME; 189 break; 190 } 191 break; 192 case IN_TAG_NAME: 193 switch (c) { 194 // XML 1.1 § 2.3 "White Space" 195 case ' ': 196 case '\n': 197 case '\r': 198 case '\t': 199 qName = getToken(); 200 state = State.AFTER_START_NAME; 201 break; 202 case '/': 203 qName = getToken(); 204 onStartTagFinished(); 205 state = State.IN_EMPTY_TAG; 206 break; 207 case '>': 208 qName = getToken(); 209 onStartTagFinished(); 210 state = State.START; 211 break; 212 default: 213 tokenBuffer.append(c); 214 break; 215 } 216 break; 217 case IN_END_TAG: 218 switch (c) { 219 case '>': 220 onEndTagFinished(); 221 break; 222 default: 223 tokenBuffer.append(c); 224 break; 225 } 226 break; 227 case AFTER_START_NAME: 228 switch (c) { 229 case '/': 230 onStartTagFinished(); 231 state = State.IN_EMPTY_TAG; 232 break; 233 case '>': 234 onStartTagFinished(); 235 state = State.START; 236 break; 237 // XML 1.1 § 2.3 "White Space" 238 case ' ': 239 case '\n': 240 case '\r': 241 case '\t': 242 break; 243 // Attribute Name 244 default: 245 tokenBuffer.append(c); 246 state = State.IN_ATTRIBUTE_NAME; 247 break; 248 } 249 break; 250 case IN_ATTRIBUTE_NAME: 251 switch (c) { 252 case '=': 253 attributeName = getToken(); 254 state = State.AFTER_ATTRIBUTE_EQUALS; 255 break; 256 default: 257 tokenBuffer.append(c); 258 } 259 break; 260 case AFTER_ATTRIBUTE_EQUALS: 261 switch (c) { 262 case '\'': 263 attributeValueQuotes = AttributeValueQuotes.apos; 264 state = State.IN_ATTRIBUTE_VALUE; 265 break; 266 case '\"': 267 attributeValueQuotes = AttributeValueQuotes.quot; 268 state = State.IN_ATTRIBUTE_VALUE; 269 break; 270 default: 271 throw new IOException(); 272 } 273 break; 274 case IN_ATTRIBUTE_VALUE: 275 if (c == attributeValueQuotes.c) { 276 attributes.put(attributeName, getToken()); 277 state = State.AFTER_START_NAME; 278 } else { 279 tokenBuffer.append(c); 280 } 281 break; 282 case IN_EMPTY_TAG: 283 switch (c) { 284 case '>': 285 onEndTagFinished(); 286 break; 287 default: 288 throw new IOException(); 289 } 290 break; 291 case IN_PROCESSING_INSTRUCTION_OR_DECLARATION: 292 switch (c) { 293 case '\'': 294 attributeValueQuotes = AttributeValueQuotes.apos; 295 state = State.IN_PROCESSING_INSTRUCTION_OR_DECLARATION_PSEUDO_ATTRIBUTE_VALUE; 296 break; 297 case '\"': 298 attributeValueQuotes = AttributeValueQuotes.quot; 299 state = State.IN_PROCESSING_INSTRUCTION_OR_DECLARATION_PSEUDO_ATTRIBUTE_VALUE; 300 break; 301 case '?': 302 state = State.IN_PROCESSING_INSTRUCTION_OR_DECLARATION_QUESTION_MARK; 303 break; 304 } 305 break; 306 case IN_PROCESSING_INSTRUCTION_OR_DECLARATION_PSEUDO_ATTRIBUTE_VALUE: 307 if (c == attributeValueQuotes.c) { 308 state = State.IN_PROCESSING_INSTRUCTION_OR_DECLARATION; 309 } 310 break; 311 case IN_PROCESSING_INSTRUCTION_OR_DECLARATION_QUESTION_MARK: 312 if (c == '>') { 313 String processingInstructionOrDeclaration = splittedPartBuffer.toString(); 314 onProcessingInstructionOrDeclaration(processingInstructionOrDeclaration); 315 newSplittedPart(); 316 } else { 317 state = State.IN_PROCESSING_INSTRUCTION_OR_DECLARATION; 318 } 319 break; 320 case AFTER_COMMENT_BANG: 321 case AFTER_COMMENT_DASH1: 322 case AFTER_COMMENT_DASH2: 323 case AFTER_COMMENT: 324 case AFTER_COMMENT_CLOSING_DASH1: 325 case AFTER_COMMENT_CLOSING_DASH2: 326 throw new UnsupportedOperationException(); 327 } 328 } 329 330 private void onStartTagFinished() { 331 // qName should already be set correctly. 332 depth++; 333 String prefix = extractPrefix(qName); 334 String localpart = extractLocalpart(qName); 335 onStartTag(prefix, localpart, attributes); 336 attributes.clear(); 337 } 338 339 private void onEndTagFinished() { 340 String endTagName = getToken(); 341 if (endTagName.length() == 0) { 342 // empty element case 343 endTagName = qName; 344 } 345 depth--; 346 if (depth == 0) { 347 String completeElement = splittedPartBuffer.toString(); 348 splittedPartBuffer.setLength(0); 349 completeElementCallback.onCompleteElement(completeElement); 350 } 351 onEndTag(endTagName); 352 353 assert state != State.START; 354 state = State.START; 355 } 356 357 private String getToken() { 358 String token = tokenBuffer.toString(); 359 tokenBuffer.setLength(0); 360 return token; 361 } 362 363 private void onProcessingInstructionOrDeclaration(String processingInstructionOrDeclaration) { 364 if (processingInstructionOrDeclaration.startsWith("<?xml ")) { 365 if (declarationCallback != null) { 366 declarationCallback.onDeclaration(processingInstructionOrDeclaration); 367 } 368 } else { 369 if (processingInstructionCallback != null) { 370 processingInstructionCallback.onProcessingInstruction(processingInstructionOrDeclaration); 371 } 372 } 373 } 374 375 private static String extractPrefix(String qName) { 376 int index = qName.indexOf(':'); 377 return index > -1 ? qName.substring(0, index) : qName; 378 } 379 380 private static String extractLocalpart(String qName) { 381 int index = qName.indexOf(':'); 382 return index > -1 ? qName.substring(index + 1) : qName; 383 } 384}