001/** 002 * 003 * Copyright © 2014-2019 Florian Schmaus 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.jxmpp.util; 018 019import org.jxmpp.util.cache.LruCache; 020 021/** 022 * Utility class for handling Strings in XMPP. 023 */ 024public class XmppStringUtils { 025 026 /** 027 * Returns the localpart of an XMPP address (JID). For example, for the address "user@xmpp.org/Resource", "user" 028 * would be returned. Returns <code>null</code> if the given JID has no localpart. Returns the empty string if 029 * the given JIDs localpart is the empty string (which is invalid). 030 * 031 * @param jid 032 * the XMPP address to parse. 033 * @return the name portion of the XMPP address, the empty String or <code>null</code>. 034 */ 035 public static String parseLocalpart(String jid) { 036 int atIndex = jid.indexOf('@'); 037 if (atIndex < 0) { 038 return null; 039 } 040 if (atIndex == 0) { 041 // '@' as first character, i.e. '@example.org". Return emtpy string as 042 // localpart, to make it possible to differentiate this from 'example.org' 043 // (which would return 'null' as localpart). 044 return ""; 045 } 046 047 int slashIndex = jid.indexOf('/'); 048 if (slashIndex >= 0 && slashIndex < atIndex) { 049 // This is an '@' character in the resourcepart. 050 return null; 051 } 052 053 return jid.substring(0, atIndex); 054 } 055 056 /** 057 * Returns the domain of an XMPP address (JID). For example, for the address "user@xmpp.org/Resource", "xmpp.org" 058 * would be returned. If <code>jid</code> is <code>null</code>, then this method returns also <code>null</code>. If 059 * the input String is no valid JID or has no domainpart, then this method will return the empty String. 060 * 061 * @param jid 062 * the XMPP address to parse. 063 * @return the domainpart of the XMPP address, the empty String or <code>null</code>. 064 */ 065 public static String parseDomain(String jid) { 066 if (jid == null) return null; 067 068 int atIndex = jid.indexOf('@'); 069 int slashIndex = jid.indexOf('/'); 070 if (slashIndex >= 0) { 071 // 'local@domain.foo/resource' and 'local@domain.foo/res@otherres' case 072 if (slashIndex > atIndex) { 073 return jid.substring(atIndex + 1, slashIndex); 074 // 'domain.foo/res@otherres' case 075 } else { 076 return jid.substring(0, slashIndex); 077 } 078 } else { 079 return jid.substring(atIndex + 1); 080 } 081 } 082 083 /** 084 * Returns the resource portion of an XMPP address (JID). For example, for the address "user@xmpp.org/Resource", 085 * "Resource" would be returned. Returns <code>null</code> if the given JID has no resourcepart. Returns the 086 * empty string if the given JID has an empty resourcepart (which is invalid). 087 * 088 * @param jid 089 * the XMPP address to parse. 090 * @return the resource portion of the XMPP address. 091 */ 092 public static String parseResource(String jid) { 093 int slashIndex = jid.indexOf('/'); 094 if (slashIndex < 0) { 095 return null; 096 } 097 if (slashIndex + 1 > jid.length()) { 098 return ""; 099 } else { 100 return jid.substring(slashIndex + 1); 101 } 102 } 103 104 /** 105 * Returns the JID with any resource information removed. For example, for 106 * the address "matt@jivesoftware.com/Smack", "matt@jivesoftware.com" would 107 * be returned. 108 * 109 * @param jid 110 * the XMPP JID. 111 * @return the bare XMPP JID without resource information. 112 */ 113 public static String parseBareJid(String jid) { 114 int slashIndex = jid.indexOf('/'); 115 if (slashIndex < 0) { 116 return jid; 117 } else if (slashIndex == 0) { 118 return ""; 119 } else { 120 return jid.substring(0, slashIndex); 121 } 122 } 123 124 /** 125 * Returns true if jid is a full JID (i.e. a JID with resource part). 126 * 127 * @param jid the String to check. 128 * @return true if full JID, false otherwise 129 */ 130 public static boolean isFullJID(String jid) { 131 if (parseLocalpart(jid).length() <= 0 || parseDomain(jid).length() <= 0 132 || parseResource(jid).length() <= 0) { 133 return false; 134 } 135 return true; 136 } 137 138 /** 139 * Returns true if <code>jid</code> is a bare JID ("foo@bar.com"). 140 * <p> 141 * This method may return true for Strings that are not valid JIDs (e.g. because of Stringprep violations). Consider 142 * using <code>org.jxmpp.jid.util.JidUtil.validateBareJid(String)</code> from jxmpp-jid instead of this method as it 143 * exceptions provide a meaningful message string why the JID is not a bare JID and will also check for Stringprep 144 * errors. 145 * </p> 146 * 147 * @param jid the String to check. 148 * @return true if bare JID, false otherwise 149 */ 150 public static boolean isBareJid(String jid) { 151 return parseLocalpart(jid).length() > 0 152 && parseDomain(jid).length() > 0 153 && parseResource(jid).length() == 0; 154 } 155 156 private static final LruCache<String, String> LOCALPART_ESACPE_CACHE = new LruCache<String, String>(100); 157 private static final LruCache<String, String> LOCALPART_UNESCAPE_CACHE = new LruCache<String, String>(100); 158 159 /** 160 * Escapes the localpart of a JID according to "JID Escaping" (XEP-0106). 161 * Escaping replaces characters prohibited by Nodeprep with escape sequences, 162 * as follows: 163 * <table border="1"> 164 * <caption>Character mappings</caption> 165 * <tr><td><b>Unescaped Character</b></td><td><b>Encoded Sequence</b></td></tr> 166 * <tr><td><space></td><td>\20</td></tr> 167 * <tr><td>"</td><td>\22</td></tr> 168 * <tr><td>&</td><td>\26</td></tr> 169 * <tr><td>'</td><td>\27</td></tr> 170 * <tr><td>/</td><td>\2f</td></tr> 171 * <tr><td>:</td><td>\3a</td></tr> 172 * <tr><td><</td><td>\3c</td></tr> 173 * <tr><td>></td><td>\3e</td></tr> 174 * <tr><td>@</td><td>\40</td></tr> 175 * <tr><td>\</td><td>\5c</td></tr> 176 * </table> 177 * 178 * <p> 179 * This process is useful when the localpart comes from an external source that doesn't 180 * conform to Nodeprep. For example, a username in LDAP may be "Joe Smith". Because 181 * the <space> character isn't a valid part of a localpart, the username should 182 * be escaped to "Joe\20Smith" before being made into a JID (e.g. "joe\20smith@example.com" 183 * after case-folding, etc. has been applied). 184 * </p> 185 * 186 * All localpart escaping and un-escaping must be performed manually at the appropriate 187 * time; the JID class will not escape or un-escape automatically. 188 * 189 * @param localpart the localpart. 190 * @return the escaped version of the localpart. 191 * @see <a href="http://xmpp.org/extensions/xep-0106.html">XEP-106: JID Escaping</a> 192 */ 193 public static String escapeLocalpart(String localpart) { 194 if (localpart == null) { 195 return null; 196 } 197 String res = LOCALPART_ESACPE_CACHE.lookup(localpart); 198 if (res != null) { 199 return res; 200 } 201 StringBuilder buf = new StringBuilder(localpart.length() + 8); 202 for (int i = 0, n = localpart.length(); i < n; i++) { 203 char c = localpart.charAt(i); 204 switch (c) { 205 case '"': 206 buf.append("\\22"); 207 break; 208 case '&': 209 buf.append("\\26"); 210 break; 211 case '\'': 212 buf.append("\\27"); 213 break; 214 case '/': 215 buf.append("\\2f"); 216 break; 217 case ':': 218 buf.append("\\3a"); 219 break; 220 case '<': 221 buf.append("\\3c"); 222 break; 223 case '>': 224 buf.append("\\3e"); 225 break; 226 case '@': 227 buf.append("\\40"); 228 break; 229 case '\\': 230 buf.append("\\5c"); 231 break; 232 default: { 233 if (Character.isWhitespace(c)) { 234 buf.append("\\20"); 235 } else { 236 buf.append(c); 237 } 238 } 239 } 240 } 241 res = buf.toString(); 242 LOCALPART_ESACPE_CACHE.put(localpart, res); 243 return res; 244 } 245 246 /** 247 * Un-escapes the localpart of a JID according to "JID Escaping" (XEP-0106). 248 * Escaping replaces characters prohibited by Nodeprep with escape sequences, 249 * as follows: 250 * 251 * <table border="1"> 252 * <caption>Character mapping</caption> 253 * <tr><td><b>Unescaped Character</b></td><td><b>Encoded Sequence</b></td></tr> 254 * <tr><td><space></td><td>\20</td></tr> 255 * <tr><td>"</td><td>\22</td></tr> 256 * <tr><td>&</td><td>\26</td></tr> 257 * <tr><td>'</td><td>\27</td></tr> 258 * <tr><td>/</td><td>\2f</td></tr> 259 * <tr><td>:</td><td>\3a</td></tr> 260 * <tr><td><</td><td>\3c</td></tr> 261 * <tr><td>></td><td>\3e</td></tr> 262 * <tr><td>@</td><td>\40</td></tr> 263 * <tr><td>\</td><td>\5c</td></tr> 264 * </table> 265 * 266 * <p> 267 * This process is useful when the localpart comes from an external source that doesn't 268 * conform to Nodeprep. For example, a username in LDAP may be "Joe Smith". Because 269 * the <space> character isn't a valid part of a localpart, the username should 270 * be escaped to "Joe\20Smith" before being made into a JID (e.g. "joe\20smith@example.com" 271 * after case-folding, etc. has been applied). 272 * </p> 273 * 274 * All localpart escaping and un-escaping must be performed manually at the appropriate 275 * time; the JID class will not escape or un-escape automatically. 276 * 277 * @param localpart the escaped version of the localpart. 278 * @return the un-escaped version of the localpart. 279 * @see <a href="http://xmpp.org/extensions/xep-0106.html">XEP-106: JID Escaping</a> 280 */ 281 public static String unescapeLocalpart(String localpart) { 282 if (localpart == null) { 283 return null; 284 } 285 String res = LOCALPART_UNESCAPE_CACHE.lookup(localpart); 286 if (res != null) { 287 return res; 288 } 289 char[] localpartChars = localpart.toCharArray(); 290 StringBuilder buf = new StringBuilder(localpartChars.length); 291 for (int i = 0, n = localpartChars.length; i < n; i++) { 292 compare: { 293 char c = localpart.charAt(i); 294 if (c == '\\' && i + 2 < n) { 295 char c2 = localpartChars[i + 1]; 296 char c3 = localpartChars[i + 2]; 297 switch(c2) { 298 case '2': 299 switch (c3) { 300 case '0': 301 buf.append(' '); 302 i += 2; 303 break compare; 304 case '2': 305 buf.append('"'); 306 i += 2; 307 break compare; 308 case '6': 309 buf.append('&'); 310 i += 2; 311 break compare; 312 case '7': 313 buf.append('\''); 314 i += 2; 315 break compare; 316 case 'f': 317 buf.append('/'); 318 i += 2; 319 break compare; 320 } 321 break; 322 case '3': 323 switch (c3) { 324 case 'a': 325 buf.append(':'); 326 i += 2; 327 break compare; 328 case 'c': 329 buf.append('<'); 330 i += 2; 331 break compare; 332 case 'e': 333 buf.append('>'); 334 i += 2; 335 break compare; 336 } 337 break; 338 case '4': 339 if (c3 == '0') { 340 buf.append("@"); 341 i += 2; 342 break compare; 343 } 344 break; 345 case '5': 346 if (c3 == 'c') { 347 buf.append("\\"); 348 i += 2; 349 break compare; 350 } 351 break; 352 } 353 } 354 buf.append(c); 355 } 356 } 357 res = buf.toString(); 358 LOCALPART_UNESCAPE_CACHE.put(localpart, res); 359 return res; 360 } 361 362 /** 363 * Construct a JID String from the given parts. 364 * 365 * @param localpart the localpart. 366 * @param domainpart the domainpart. 367 * @return the constructed JID String. 368 */ 369 public static String completeJidFrom(CharSequence localpart, CharSequence domainpart) { 370 return completeJidFrom(localpart != null ? localpart.toString() : null, domainpart.toString()); 371 } 372 373 /** 374 * Construct a JID String from the given parts. 375 * 376 * @param localpart the localpart. 377 * @param domainpart the domainpart. 378 * @return the constructed JID String. 379 */ 380 public static String completeJidFrom(String localpart, String domainpart) { 381 return completeJidFrom(localpart, domainpart, null); 382 } 383 384 /** 385 * Construct a JID String from the given parts. 386 * 387 * @param localpart the localpart. 388 * @param domainpart the domainpart. 389 * @param resource the resourcepart. 390 * @return the constructed JID String. 391 */ 392 public static String completeJidFrom(CharSequence localpart, CharSequence domainpart, CharSequence resource) { 393 return completeJidFrom(localpart != null ? localpart.toString() : null, domainpart.toString(), 394 resource != null ? resource.toString() : null); 395 } 396 397 /** 398 * Construct a JID String from the given parts. 399 * 400 * @param localpart the localpart. 401 * @param domainpart the domainpart. 402 * @param resource the resourcepart. 403 * @return the constructed JID String. 404 */ 405 public static String completeJidFrom(String localpart, String domainpart, String resource) { 406 if (domainpart == null) { 407 throw new IllegalArgumentException("domainpart must not be null"); 408 } 409 int localpartLength = localpart != null ? localpart.length() : 0; 410 int domainpartLength = domainpart.length(); 411 int resourceLength = resource != null ? resource.length() : 0; 412 int maxResLength = localpartLength + domainpartLength + resourceLength + 2; 413 StringBuilder sb = new StringBuilder(maxResLength); 414 if (localpartLength > 0) { 415 sb.append(localpart).append('@'); 416 } 417 sb.append(domainpart); 418 if (resourceLength > 0) { 419 sb.append('/').append(resource); 420 } 421 return sb.toString(); 422 } 423 424 /** 425 * Generate a unique key from a element name and namespace. This key can be used to lookup element/namespace 426 * information. The key is simply generated by concatenating the strings as follows: 427 * <code>element + '\t' + namespace</code>. 428 * <p> 429 * The tab character (\t) was chosen because it will be normalized, i.e. replace by space, in attribute values. It 430 * therefore should never appear in <code>element</code> or <code>namespace</code>. For more information about the 431 * normalization, see the XML specification § <a href="http://www.w3.org/TR/REC-xml/#AVNormalize">3.3.3 432 * Attribute-Value Normalization</a>. 433 * </p> 434 * 435 * @param element the element. 436 * @param namespace the namespace. 437 * @return the unique key of element and namespace. 438 */ 439 public static String generateKey(String element, String namespace) { 440 return element + '\t' + namespace; 441 } 442}