001/**
002 *
003 * Copyright © 2014-2019 Florian Schmaus
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.jxmpp.util;
018
019import org.jxmpp.util.cache.LruCache;
020
021/**
022 * Utility class for handling Strings in XMPP.
023 */
024public class XmppStringUtils {
025
026        /**
027         * Returns the localpart of an XMPP address (JID). For example, for the address "user@xmpp.org/Resource", "user"
028         * would be returned. Returns <code>null</code> if the given JID has no localpart. Returns the empty string if
029         * the given JIDs localpart is the empty string (which is invalid).
030         * 
031         * @param jid
032         *            the XMPP address to parse.
033         * @return the name portion of the XMPP address, the empty String or <code>null</code>.
034         */
035        public static String parseLocalpart(String jid) {
036                int atIndex = jid.indexOf('@');
037                if (atIndex < 0) {
038                        return null;
039                }
040                if (atIndex == 0) {
041                        // '@' as first character, i.e. '@example.org". Return empty string as
042                        // localpart, to make it possible to differentiate this from 'example.org'
043                        // (which would return 'null' as localpart).
044                        return "";
045                }
046
047                int slashIndex = jid.indexOf('/');
048                if (slashIndex >= 0 && slashIndex < atIndex) {
049                        // This is an '@' character in the resourcepart.
050                        return null;
051                }
052
053                return jid.substring(0, atIndex);
054        }
055
056        /**
057         * Returns the domain of an XMPP address (JID). For example, for the address "user@xmpp.org/Resource", "xmpp.org"
058         * would be returned. If <code>jid</code> is <code>null</code>, then this method returns also <code>null</code>. If
059         * the input String is no valid JID or has no domainpart, then this method will return the empty String.
060         * 
061         * @param jid
062         *            the XMPP address to parse.
063         * @return the domainpart of the XMPP address, the empty String or <code>null</code>.
064         */
065        public static String parseDomain(String jid) {
066                if (jid == null) return null;
067
068                int atIndex = jid.indexOf('@');
069                int slashIndex = jid.indexOf('/');
070                if (slashIndex >= 0) {
071                        // 'local@domain.foo/resource' and 'local@domain.foo/res@otherres' case
072                        if (slashIndex > atIndex) {
073                                return jid.substring(atIndex + 1, slashIndex);
074                        // 'domain.foo/res@otherres' case
075                        } else {
076                                return jid.substring(0, slashIndex);
077                        }
078                } else {
079                        return jid.substring(atIndex + 1);
080                }
081        }
082
083        /**
084         * Returns the resource portion of an XMPP address (JID). For example, for the address "user@xmpp.org/Resource",
085         * "Resource" would be returned. Returns <code>null</code> if the given JID has no resourcepart. Returns the
086         * empty string if the given JID has an empty resourcepart (which is invalid).
087         * 
088         * @param jid
089         *            the XMPP address to parse.
090         * @return the resource portion of the XMPP address.
091         */
092        public static String parseResource(String jid) {
093                int slashIndex = jid.indexOf('/');
094                if (slashIndex < 0) {
095                        return null;
096                }
097                if (slashIndex + 1 > jid.length()) {
098                        return "";
099                } else {
100                        return jid.substring(slashIndex + 1);
101                }
102        }
103
104        /**
105         * Returns the JID with any resource information removed. For example, for
106         * the address "matt@jivesoftware.com/Smack", "matt@jivesoftware.com" would
107         * be returned.
108         * 
109         * @param jid
110         *            the XMPP JID.
111         * @return the bare XMPP JID without resource information.
112         */
113        public static String parseBareJid(String jid) {
114                int slashIndex = jid.indexOf('/');
115                if (slashIndex < 0) {
116                        return jid;
117                } else if (slashIndex == 0) {
118                        return "";
119                } else {
120                        return jid.substring(0, slashIndex);
121                }
122        }
123
124        /**
125         * Returns true if jid is a full JID (i.e. a JID with resource part).
126         * 
127         * @param jid the String to check.
128         * @return true if full JID, false otherwise
129         */
130        public static boolean isFullJID(String jid) {
131                String domain = parseDomain(jid);
132                String resource = parseResource(jid);
133                if ((domain == null || domain.length() <= 0)
134                                || (resource == null || resource.length() <= 0)) {
135                        return false;
136                }
137                return true;
138        }
139
140        /**
141         * Returns true if <code>jid</code> is a bare JID ("foo@bar.com").
142         * <p>
143         * This method may return true for Strings that are not valid JIDs (e.g. because of Stringprep violations). Consider
144         * using <code>org.jxmpp.jid.util.JidUtil.validateBareJid(String)</code> from jxmpp-jid instead of this method as it
145         * exceptions provide a meaningful message string why the JID is not a bare JID and will also check for Stringprep
146         * errors.
147         * </p>
148         *
149         * @param jid the String to check.
150         * @return true if bare JID, false otherwise
151         */
152        public static boolean isBareJid(String jid) {
153                String domain = parseDomain(jid);
154                String resource = parseResource(jid);
155                return (domain != null && domain.length() > 0
156                                && (resource == null || resource.length() == 0));
157        }
158
159        private static final LruCache<String, String> LOCALPART_ESCAPE_CACHE = new LruCache<String, String>(100);
160        private static final LruCache<String, String> LOCALPART_UNESCAPE_CACHE = new LruCache<String, String>(100);
161
162        /**
163         * Escapes the localpart of a JID according to "JID Escaping" (XEP-0106).
164         * Escaping replaces characters prohibited by Nodeprep with escape sequences,
165         * as follows:
166         * <table border="1">
167         * <caption>Character mappings</caption>
168         * <tr><td><b>Unescaped Character</b></td><td><b>Encoded Sequence</b></td></tr>
169         * <tr><td>&lt;space&gt;</td><td>\20</td></tr>
170         * <tr><td>"</td><td>\22</td></tr>
171         * <tr><td>&amp;</td><td>\26</td></tr>
172         * <tr><td>'</td><td>\27</td></tr>
173         * <tr><td>/</td><td>\2f</td></tr>
174         * <tr><td>:</td><td>\3a</td></tr>
175         * <tr><td>&lt;</td><td>\3c</td></tr>
176         * <tr><td>&gt;</td><td>\3e</td></tr>
177         * <tr><td>@</td><td>\40</td></tr>
178         * <tr><td>\</td><td>\5c</td></tr>
179         * </table>
180         *
181         * <p>
182         * This process is useful when the localpart comes from an external source that doesn't
183         * conform to Nodeprep. For example, a username in LDAP may be "Joe Smith". Because
184         * the &lt;space&gt; character isn't a valid part of a localpart, the username should
185         * be escaped to "Joe\20Smith" before being made into a JID (e.g. "joe\20smith@example.com"
186         * after case-folding, etc. has been applied).
187         * </p>
188         *
189         * All localpart escaping and un-escaping must be performed manually at the appropriate
190         * time; the JID class will not escape or un-escape automatically.
191         *
192         * @param localpart the localpart.
193         * @return the escaped version of the localpart.
194         * @see <a href="http://xmpp.org/extensions/xep-0106.html">XEP-106: JID Escaping</a>
195         */
196        public static String escapeLocalpart(String localpart) {
197                if (localpart == null) {
198                        return null;
199                }
200                String res = LOCALPART_ESCAPE_CACHE.lookup(localpart);
201                if (res != null) {
202                        return res;
203                }
204                StringBuilder buf = new StringBuilder(localpart.length() + 8);
205                for (int i = 0, n = localpart.length(); i < n; i++) {
206                        char c = localpart.charAt(i);
207                        switch (c) {
208                        case '"':
209                                buf.append("\\22");
210                                break;
211                        case '&':
212                                buf.append("\\26");
213                                break;
214                        case '\'':
215                                buf.append("\\27");
216                                break;
217                        case '/':
218                                buf.append("\\2f");
219                                break;
220                        case ':':
221                                buf.append("\\3a");
222                                break;
223                        case '<':
224                                buf.append("\\3c");
225                                break;
226                        case '>':
227                                buf.append("\\3e");
228                                break;
229                        case '@':
230                                buf.append("\\40");
231                                break;
232                        case '\\':
233                                buf.append("\\5c");
234                                break;
235                        default: {
236                                if (Character.isWhitespace(c)) {
237                                        buf.append("\\20");
238                                } else {
239                                        buf.append(c);
240                                }
241                        }
242                        }
243                }
244                res = buf.toString();
245                LOCALPART_ESCAPE_CACHE.put(localpart, res);
246                return res;
247        }
248
249        /**
250         * Un-escapes the localpart of a JID according to "JID Escaping" (XEP-0106).
251         * Escaping replaces characters prohibited by Nodeprep with escape sequences,
252         * as follows:
253         * 
254         * <table border="1">
255         * <caption>Character mapping</caption>
256         * <tr><td><b>Unescaped Character</b></td><td><b>Encoded Sequence</b></td></tr>
257         * <tr><td>&lt;space&gt;</td><td>\20</td></tr>
258         * <tr><td>"</td><td>\22</td></tr>
259         * <tr><td>&amp;</td><td>\26</td></tr>
260         * <tr><td>'</td><td>\27</td></tr>
261         * <tr><td>/</td><td>\2f</td></tr>
262         * <tr><td>:</td><td>\3a</td></tr>
263         * <tr><td>&lt;</td><td>\3c</td></tr>
264         * <tr><td>&gt;</td><td>\3e</td></tr>
265         * <tr><td>@</td><td>\40</td></tr>
266         * <tr><td>\</td><td>\5c</td></tr>
267         * </table>
268         *
269         * <p>
270         * This process is useful when the localpart comes from an external source that doesn't
271         * conform to Nodeprep. For example, a username in LDAP may be "Joe Smith". Because
272         * the &lt;space&gt; character isn't a valid part of a localpart, the username should
273         * be escaped to "Joe\20Smith" before being made into a JID (e.g. "joe\20smith@example.com"
274         * after case-folding, etc. has been applied).
275         * </p>
276         *
277         * All localpart escaping and un-escaping must be performed manually at the appropriate
278         * time; the JID class will not escape or un-escape automatically.
279         *
280         * @param localpart the escaped version of the localpart.
281         * @return the un-escaped version of the localpart.
282         * @see <a href="http://xmpp.org/extensions/xep-0106.html">XEP-106: JID Escaping</a>
283         */
284        public static String unescapeLocalpart(String localpart) {
285                if (localpart == null) {
286                        return null;
287                }
288                String res = LOCALPART_UNESCAPE_CACHE.lookup(localpart);
289                if (res != null) {
290                        return res;
291                }
292                char[] localpartChars = localpart.toCharArray();
293                StringBuilder buf = new StringBuilder(localpartChars.length);
294                for (int i = 0, n = localpartChars.length; i < n; i++) {
295                        compare: {
296                                char c = localpart.charAt(i);
297                                if (c == '\\' && i + 2 < n) {
298                                        char c2 = localpartChars[i + 1];
299                                        char c3 = localpartChars[i + 2];
300                                        switch(c2) {
301                                        case '2':
302                                                switch (c3) {
303                                                case '0':
304                                                        buf.append(' ');
305                                                        i += 2;
306                                                        break compare;
307                                                case '2':
308                                                        buf.append('"');
309                                                        i += 2;
310                                                        break compare;
311                                                case '6':
312                                                        buf.append('&');
313                                                        i += 2;
314                                                        break compare;
315                                                case '7':
316                                                        buf.append('\'');
317                                                        i += 2;
318                                                        break compare;
319                                                case 'f':
320                                                        buf.append('/');
321                                                        i += 2;
322                                                        break compare;
323                                                }
324                                                break;
325                                        case '3':
326                                                switch (c3) {
327                                                case 'a':
328                                                        buf.append(':');
329                                                        i += 2;
330                                                        break compare;
331                                                case 'c':
332                                                        buf.append('<');
333                                                        i += 2;
334                                                        break compare;
335                                                case 'e':
336                                                        buf.append('>');
337                                                        i += 2;
338                                                        break compare;
339                                                }
340                                                break;
341                                        case '4':
342                                                if (c3 == '0') {
343                                                        buf.append("@");
344                                                        i += 2;
345                                                        break compare;
346                                                }
347                                                break;
348                                        case '5':
349                                                if (c3 == 'c') {
350                                                        buf.append("\\");
351                                                        i += 2;
352                                                        break compare;
353                                                }
354                                                break;
355                                        }
356                                }
357                                buf.append(c);
358                        }
359                }
360                res = buf.toString();
361                LOCALPART_UNESCAPE_CACHE.put(localpart, res);
362                return res;
363        }
364
365        /**
366         * Construct a JID String from the given parts.
367         *
368         * @param localpart the localpart.
369         * @param domainpart the domainpart.
370         * @return the constructed JID String.
371         */
372        public static String completeJidFrom(CharSequence localpart, CharSequence domainpart) {
373                return completeJidFrom(localpart != null ? localpart.toString() : null, domainpart.toString());
374        }
375
376        /**
377         * Construct a JID String from the given parts.
378         *
379         * @param localpart the localpart.
380         * @param domainpart the domainpart.
381         * @return the constructed JID String.
382         */
383        public static String completeJidFrom(String localpart, String domainpart) {
384                return completeJidFrom(localpart, domainpart, null);
385        }
386
387        /**
388         * Construct a JID String from the given parts.
389         *
390         * @param localpart the localpart.
391         * @param domainpart the domainpart.
392         * @param resource the resourcepart.
393         * @return the constructed JID String.
394         */
395        public static String completeJidFrom(CharSequence localpart, CharSequence domainpart, CharSequence resource) {
396                return completeJidFrom(localpart != null ? localpart.toString() : null, domainpart.toString(),
397                                resource != null ? resource.toString() : null);
398        }
399
400        /**
401         * Construct a JID String from the given parts.
402         *
403         * @param localpart the localpart.
404         * @param domainpart the domainpart.
405         * @param resource the resourcepart.
406         * @return the constructed JID String.
407         */
408        public static String completeJidFrom(String localpart, String domainpart, String resource) {
409                if (domainpart == null) {
410                        throw new IllegalArgumentException("domainpart must not be null");
411                }
412                int localpartLength = localpart != null ? localpart.length() : 0;
413                int domainpartLength = domainpart.length();
414                int resourceLength = resource != null ? resource.length() : 0;
415                int maxResLength = localpartLength + domainpartLength + resourceLength + 2;
416                StringBuilder sb = new StringBuilder(maxResLength);
417                if (localpartLength > 0) {
418                        sb.append(localpart).append('@');
419                }
420                sb.append(domainpart);
421                if (resourceLength > 0) {
422                        sb.append('/').append(resource);
423                }
424                return sb.toString();
425        }
426
427        /**
428         * Generate a unique key from a element name and namespace. This key can be used to lookup element/namespace
429         * information. The key is simply generated by concatenating the strings as follows:
430         * <code>element + '\t' + namespace</code>.
431         * <p>
432         * The tab character (\t) was chosen because it will be normalized, i.e. replace by space, in attribute values. It
433         * therefore should never appear in <code>element</code> or <code>namespace</code>. For more information about the
434         * normalization, see the XML specification § <a href="http://www.w3.org/TR/REC-xml/#AVNormalize">3.3.3
435         * Attribute-Value Normalization</a>.
436         * </p>
437         * 
438         * @param element the element.
439         * @param namespace the namespace.
440         * @return the unique key of element and namespace.
441         */
442        public static String generateKey(String element, String namespace) {
443                return element + '\t' + namespace;
444        }
445}