001/**
002 *
003 * Copyright © 2014-2019 Florian Schmaus
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.jxmpp.util;
018
019import org.jxmpp.util.cache.LruCache;
020
021/**
022 * Utility class for handling Strings in XMPP.
023 */
024public class XmppStringUtils {
025
026        /**
027         * Returns the localpart of an XMPP address (JID). For example, for the address "user@xmpp.org/Resource", "user"
028         * would be returned. Returns <code>null</code> if the given JID has no localpart. Returns the empty string if
029         * the given JIDs localpart is the empty string (which is invalid).
030         * 
031         * @param jid
032         *            the XMPP address to parse.
033         * @return the name portion of the XMPP address, the empty String or <code>null</code>.
034         */
035        public static String parseLocalpart(String jid) {
036                int atIndex = jid.indexOf('@');
037                if (atIndex < 0) {
038                        return null;
039                }
040                if (atIndex == 0) {
041                        // '@' as first character, i.e. '@example.org". Return emtpy string as
042                        // localpart, to make it possible to differentiate this from 'example.org'
043                        // (which would return 'null' as localpart).
044                        return "";
045                }
046
047                int slashIndex = jid.indexOf('/');
048                if (slashIndex >= 0 && slashIndex < atIndex) {
049                        // This is an '@' character in the resourcepart.
050                        return null;
051                }
052
053                return jid.substring(0, atIndex);
054        }
055
056        /**
057         * Returns the domain of an XMPP address (JID). For example, for the address "user@xmpp.org/Resource", "xmpp.org"
058         * would be returned. If <code>jid</code> is <code>null</code>, then this method returns also <code>null</code>. If
059         * the input String is no valid JID or has no domainpart, then this method will return the empty String.
060         * 
061         * @param jid
062         *            the XMPP address to parse.
063         * @return the domainpart of the XMPP address, the empty String or <code>null</code>.
064         */
065        public static String parseDomain(String jid) {
066                if (jid == null) return null;
067
068                int atIndex = jid.indexOf('@');
069                int slashIndex = jid.indexOf('/');
070                if (slashIndex >= 0) {
071                        // 'local@domain.foo/resource' and 'local@domain.foo/res@otherres' case
072                        if (slashIndex > atIndex) {
073                                return jid.substring(atIndex + 1, slashIndex);
074                        // 'domain.foo/res@otherres' case
075                        } else {
076                                return jid.substring(0, slashIndex);
077                        }
078                } else {
079                        return jid.substring(atIndex + 1);
080                }
081        }
082
083        /**
084         * Returns the resource portion of an XMPP address (JID). For example, for the address "user@xmpp.org/Resource",
085         * "Resource" would be returned. Returns <code>null</code> if the given JID has no resourcepart. Returns the
086         * empty string if the given JID has an empty resourcepart (which is invalid).
087         * 
088         * @param jid
089         *            the XMPP address to parse.
090         * @return the resource portion of the XMPP address.
091         */
092        public static String parseResource(String jid) {
093                int slashIndex = jid.indexOf('/');
094                if (slashIndex < 0) {
095                        return null;
096                }
097                if (slashIndex + 1 > jid.length()) {
098                        return "";
099                } else {
100                        return jid.substring(slashIndex + 1);
101                }
102        }
103
104        /**
105         * Returns the JID with any resource information removed. For example, for
106         * the address "matt@jivesoftware.com/Smack", "matt@jivesoftware.com" would
107         * be returned.
108         * 
109         * @param jid
110         *            the XMPP JID.
111         * @return the bare XMPP JID without resource information.
112         */
113        public static String parseBareJid(String jid) {
114                int slashIndex = jid.indexOf('/');
115                if (slashIndex < 0) {
116                        return jid;
117                } else if (slashIndex == 0) {
118                        return "";
119                } else {
120                        return jid.substring(0, slashIndex);
121                }
122        }
123
124        /**
125         * Returns true if jid is a full JID (i.e. a JID with resource part).
126         * 
127         * @param jid the String to check.
128         * @return true if full JID, false otherwise
129         */
130        public static boolean isFullJID(String jid) {
131                if (parseLocalpart(jid).length() <= 0 || parseDomain(jid).length() <= 0
132                                || parseResource(jid).length() <= 0) {
133                        return false;
134                }
135                return true;
136        }
137
138        /**
139         * Returns true if <code>jid</code> is a bare JID ("foo@bar.com").
140         * <p>
141         * This method may return true for Strings that are not valid JIDs (e.g. because of Stringprep violations). Consider
142         * using <code>org.jxmpp.jid.util.JidUtil.validateBareJid(String)</code> from jxmpp-jid instead of this method as it
143         * exceptions provide a meaningful message string why the JID is not a bare JID and will also check for Stringprep
144         * errors.
145         * </p>
146         *
147         * @param jid the String to check.
148         * @return true if bare JID, false otherwise
149         */
150        public static boolean isBareJid(String jid) {
151                return parseLocalpart(jid).length() > 0
152                                && parseDomain(jid).length() > 0
153                                && parseResource(jid).length() == 0;
154        }
155
156        private static final LruCache<String, String> LOCALPART_ESACPE_CACHE = new LruCache<String, String>(100);
157        private static final LruCache<String, String> LOCALPART_UNESCAPE_CACHE = new LruCache<String, String>(100);
158
159        /**
160         * Escapes the localpart of a JID according to "JID Escaping" (XEP-0106).
161         * Escaping replaces characters prohibited by Nodeprep with escape sequences,
162         * as follows:
163         * <table border="1">
164         * <caption>Character mappings</caption>
165         * <tr><td><b>Unescaped Character</b></td><td><b>Encoded Sequence</b></td></tr>
166         * <tr><td>&lt;space&gt;</td><td>\20</td></tr>
167         * <tr><td>"</td><td>\22</td></tr>
168         * <tr><td>&amp;</td><td>\26</td></tr>
169         * <tr><td>'</td><td>\27</td></tr>
170         * <tr><td>/</td><td>\2f</td></tr>
171         * <tr><td>:</td><td>\3a</td></tr>
172         * <tr><td>&lt;</td><td>\3c</td></tr>
173         * <tr><td>&gt;</td><td>\3e</td></tr>
174         * <tr><td>@</td><td>\40</td></tr>
175         * <tr><td>\</td><td>\5c</td></tr>
176         * </table>
177         *
178         * <p>
179         * This process is useful when the localpart comes from an external source that doesn't
180         * conform to Nodeprep. For example, a username in LDAP may be "Joe Smith". Because
181         * the &lt;space&gt; character isn't a valid part of a localpart, the username should
182         * be escaped to "Joe\20Smith" before being made into a JID (e.g. "joe\20smith@example.com"
183         * after case-folding, etc. has been applied).
184         * </p>
185         *
186         * All localpart escaping and un-escaping must be performed manually at the appropriate
187         * time; the JID class will not escape or un-escape automatically.
188         *
189         * @param localpart the localpart.
190         * @return the escaped version of the localpart.
191         * @see <a href="http://xmpp.org/extensions/xep-0106.html">XEP-106: JID Escaping</a>
192         */
193        public static String escapeLocalpart(String localpart) {
194                if (localpart == null) {
195                        return null;
196                }
197                String res = LOCALPART_ESACPE_CACHE.lookup(localpart);
198                if (res != null) {
199                        return res;
200                }
201                StringBuilder buf = new StringBuilder(localpart.length() + 8);
202                for (int i = 0, n = localpart.length(); i < n; i++) {
203                        char c = localpart.charAt(i);
204                        switch (c) {
205                        case '"':
206                                buf.append("\\22");
207                                break;
208                        case '&':
209                                buf.append("\\26");
210                                break;
211                        case '\'':
212                                buf.append("\\27");
213                                break;
214                        case '/':
215                                buf.append("\\2f");
216                                break;
217                        case ':':
218                                buf.append("\\3a");
219                                break;
220                        case '<':
221                                buf.append("\\3c");
222                                break;
223                        case '>':
224                                buf.append("\\3e");
225                                break;
226                        case '@':
227                                buf.append("\\40");
228                                break;
229                        case '\\':
230                                buf.append("\\5c");
231                                break;
232                        default: {
233                                if (Character.isWhitespace(c)) {
234                                        buf.append("\\20");
235                                } else {
236                                        buf.append(c);
237                                }
238                        }
239                        }
240                }
241                res = buf.toString();
242                LOCALPART_ESACPE_CACHE.put(localpart, res);
243                return res;
244        }
245
246        /**
247         * Un-escapes the localpart of a JID according to "JID Escaping" (XEP-0106).
248         * Escaping replaces characters prohibited by Nodeprep with escape sequences,
249         * as follows:
250         * 
251         * <table border="1">
252         * <caption>Character mapping</caption>
253         * <tr><td><b>Unescaped Character</b></td><td><b>Encoded Sequence</b></td></tr>
254         * <tr><td>&lt;space&gt;</td><td>\20</td></tr>
255         * <tr><td>"</td><td>\22</td></tr>
256         * <tr><td>&amp;</td><td>\26</td></tr>
257         * <tr><td>'</td><td>\27</td></tr>
258         * <tr><td>/</td><td>\2f</td></tr>
259         * <tr><td>:</td><td>\3a</td></tr>
260         * <tr><td>&lt;</td><td>\3c</td></tr>
261         * <tr><td>&gt;</td><td>\3e</td></tr>
262         * <tr><td>@</td><td>\40</td></tr>
263         * <tr><td>\</td><td>\5c</td></tr>
264         * </table>
265         *
266         * <p>
267         * This process is useful when the localpart comes from an external source that doesn't
268         * conform to Nodeprep. For example, a username in LDAP may be "Joe Smith". Because
269         * the &lt;space&gt; character isn't a valid part of a localpart, the username should
270         * be escaped to "Joe\20Smith" before being made into a JID (e.g. "joe\20smith@example.com"
271         * after case-folding, etc. has been applied).
272         * </p>
273         *
274         * All localpart escaping and un-escaping must be performed manually at the appropriate
275         * time; the JID class will not escape or un-escape automatically.
276         *
277         * @param localpart the escaped version of the localpart.
278         * @return the un-escaped version of the localpart.
279         * @see <a href="http://xmpp.org/extensions/xep-0106.html">XEP-106: JID Escaping</a>
280         */
281        public static String unescapeLocalpart(String localpart) {
282                if (localpart == null) {
283                        return null;
284                }
285                String res = LOCALPART_UNESCAPE_CACHE.lookup(localpart);
286                if (res != null) {
287                        return res;
288                }
289                char[] localpartChars = localpart.toCharArray();
290                StringBuilder buf = new StringBuilder(localpartChars.length);
291                for (int i = 0, n = localpartChars.length; i < n; i++) {
292                        compare: {
293                                char c = localpart.charAt(i);
294                                if (c == '\\' && i + 2 < n) {
295                                        char c2 = localpartChars[i + 1];
296                                        char c3 = localpartChars[i + 2];
297                                        switch(c2) {
298                                        case '2':
299                                                switch (c3) {
300                                                case '0':
301                                                        buf.append(' ');
302                                                        i += 2;
303                                                        break compare;
304                                                case '2':
305                                                        buf.append('"');
306                                                        i += 2;
307                                                        break compare;
308                                                case '6':
309                                                        buf.append('&');
310                                                        i += 2;
311                                                        break compare;
312                                                case '7':
313                                                        buf.append('\'');
314                                                        i += 2;
315                                                        break compare;
316                                                case 'f':
317                                                        buf.append('/');
318                                                        i += 2;
319                                                        break compare;
320                                                }
321                                                break;
322                                        case '3':
323                                                switch (c3) {
324                                                case 'a':
325                                                        buf.append(':');
326                                                        i += 2;
327                                                        break compare;
328                                                case 'c':
329                                                        buf.append('<');
330                                                        i += 2;
331                                                        break compare;
332                                                case 'e':
333                                                        buf.append('>');
334                                                        i += 2;
335                                                        break compare;
336                                                }
337                                                break;
338                                        case '4':
339                                                if (c3 == '0') {
340                                                        buf.append("@");
341                                                        i += 2;
342                                                        break compare;
343                                                }
344                                                break;
345                                        case '5':
346                                                if (c3 == 'c') {
347                                                        buf.append("\\");
348                                                        i += 2;
349                                                        break compare;
350                                                }
351                                                break;
352                                        }
353                                }
354                                buf.append(c);
355                        }
356                }
357                res = buf.toString();
358                LOCALPART_UNESCAPE_CACHE.put(localpart, res);
359                return res;
360        }
361
362        /**
363         * Construct a JID String from the given parts.
364         *
365         * @param localpart the localpart.
366         * @param domainpart the domainpart.
367         * @return the constructed JID String.
368         */
369        public static String completeJidFrom(CharSequence localpart, CharSequence domainpart) {
370                return completeJidFrom(localpart != null ? localpart.toString() : null, domainpart.toString());
371        }
372
373        /**
374         * Construct a JID String from the given parts.
375         *
376         * @param localpart the localpart.
377         * @param domainpart the domainpart.
378         * @return the constructed JID String.
379         */
380        public static String completeJidFrom(String localpart, String domainpart) {
381                return completeJidFrom(localpart, domainpart, null);
382        }
383
384        /**
385         * Construct a JID String from the given parts.
386         *
387         * @param localpart the localpart.
388         * @param domainpart the domainpart.
389         * @param resource the resourcepart.
390         * @return the constructed JID String.
391         */
392        public static String completeJidFrom(CharSequence localpart, CharSequence domainpart, CharSequence resource) {
393                return completeJidFrom(localpart != null ? localpart.toString() : null, domainpart.toString(),
394                                resource != null ? resource.toString() : null);
395        }
396
397        /**
398         * Construct a JID String from the given parts.
399         *
400         * @param localpart the localpart.
401         * @param domainpart the domainpart.
402         * @param resource the resourcepart.
403         * @return the constructed JID String.
404         */
405        public static String completeJidFrom(String localpart, String domainpart, String resource) {
406                if (domainpart == null) {
407                        throw new IllegalArgumentException("domainpart must not be null");
408                }
409                int localpartLength = localpart != null ? localpart.length() : 0;
410                int domainpartLength = domainpart.length();
411                int resourceLength = resource != null ? resource.length() : 0;
412                int maxResLength = localpartLength + domainpartLength + resourceLength + 2;
413                StringBuilder sb = new StringBuilder(maxResLength);
414                if (localpartLength > 0) {
415                        sb.append(localpart).append('@');
416                }
417                sb.append(domainpart);
418                if (resourceLength > 0) {
419                        sb.append('/').append(resource);
420                }
421                return sb.toString();
422        }
423
424        /**
425         * Generate a unique key from a element name and namespace. This key can be used to lookup element/namespace
426         * information. The key is simply generated by concatenating the strings as follows:
427         * <code>element + '\t' + namespace</code>.
428         * <p>
429         * The tab character (\t) was chosen because it will be normalized, i.e. replace by space, in attribute values. It
430         * therefore should never appear in <code>element</code> or <code>namespace</code>. For more information about the
431         * normalization, see the XML specification § <a href="http://www.w3.org/TR/REC-xml/#AVNormalize">3.3.3
432         * Attribute-Value Normalization</a>.
433         * </p>
434         * 
435         * @param element the element.
436         * @param namespace the namespace.
437         * @return the unique key of element and namespace.
438         */
439        public static String generateKey(String element, String namespace) {
440                return element + '\t' + namespace;
441        }
442}