001/**
002 *
003 * Copyright © 2014-2024 Florian Schmaus
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.jxmpp.util;
018
019import org.jxmpp.util.cache.LruCache;
020
021/**
022 * Utility class for handling Strings in XMPP.
023 */
024public class XmppStringUtils {
025
026        /**
027         * Returns the localpart of an XMPP address (JID). For example, for the address "user@xmpp.org/Resource", "user"
028         * would be returned. Returns <code>null</code> if the given JID has no localpart. Returns the empty string if
029         * the given JIDs localpart is the empty string (which is invalid).
030         * 
031         * @param jid
032         *            the XMPP address to parse.
033         * @return the name portion of the XMPP address, the empty String or <code>null</code>.
034         */
035        public static String parseLocalpart(String jid) {
036                int atIndex = jid.indexOf('@');
037                if (atIndex < 0) {
038                        return null;
039                }
040                if (atIndex == 0) {
041                        // '@' as first character, i.e. '@example.org". Return empty string as
042                        // localpart, to make it possible to differentiate this from 'example.org'
043                        // (which would return 'null' as localpart).
044                        return "";
045                }
046
047                int slashIndex = jid.indexOf('/');
048                if (slashIndex >= 0 && slashIndex < atIndex) {
049                        // This is an '@' character in the resourcepart.
050                        return null;
051                }
052
053                return jid.substring(0, atIndex);
054        }
055
056        /**
057         * Returns the domain of an XMPP address (JID). For example, for the address "user@xmpp.org/Resource", "xmpp.org"
058         * would be returned. If <code>jid</code> is <code>null</code>, then this method returns also <code>null</code>. If
059         * the input String is no valid JID or has no domainpart, then this method will return the empty String.
060         * 
061         * @param jid
062         *            the XMPP address to parse.
063         * @return the domainpart of the XMPP address, the empty String or <code>null</code>.
064         */
065        public static String parseDomain(String jid) {
066                if (jid == null) return null;
067
068                int atIndex = jid.indexOf('@');
069                int slashIndex = jid.indexOf('/');
070                if (slashIndex >= 0) {
071                        // 'local@domain.foo/resource' and 'local@domain.foo/res@otherres' case
072                        if (slashIndex > atIndex) {
073                                return jid.substring(atIndex + 1, slashIndex);
074                        // 'domain.foo/res@otherres' case
075                        } else {
076                                return jid.substring(0, slashIndex);
077                        }
078                } else {
079                        return jid.substring(atIndex + 1);
080                }
081        }
082
083        /**
084         * Returns the resource portion of an XMPP address (JID). For example, for the address "user@xmpp.org/Resource",
085         * "Resource" would be returned. Returns <code>null</code> if the given JID has no resourcepart. Returns the
086         * empty string if the given JID has an empty resourcepart (which is invalid).
087         * 
088         * @param jid
089         *            the XMPP address to parse.
090         * @return the resource portion of the XMPP address.
091         */
092        public static String parseResource(String jid) {
093                int slashIndex = jid.indexOf('/');
094                if (slashIndex < 0) {
095                        return null;
096                }
097                if (slashIndex + 1 > jid.length()) {
098                        return "";
099                } else {
100                        return jid.substring(slashIndex + 1);
101                }
102        }
103
104        /**
105         * Returns the JID with any resource information removed. For example, for
106         * the address "matt@jivesoftware.com/Smack", "matt@jivesoftware.com" would
107         * be returned.
108         * 
109         * @param jid
110         *            the XMPP JID.
111         * @return the bare XMPP JID without resource information.
112         */
113        public static String parseBareJid(String jid) {
114                int slashIndex = jid.indexOf('/');
115                if (slashIndex < 0) {
116                        return jid;
117                } else if (slashIndex == 0) {
118                        return "";
119                } else {
120                        return jid.substring(0, slashIndex);
121                }
122        }
123
124        /**
125         * Returns true if jid is a full JID (i.e. a JID with resource part).
126         * 
127         * @param jid the String to check.
128         * @return true if full JID, false otherwise
129         */
130        public static boolean isFullJID(String jid) {
131                String domain = parseDomain(jid);
132                String resource = parseResource(jid);
133                if ((domain == null || domain.length() <= 0)
134                                || (resource == null || resource.length() <= 0)) {
135                        return false;
136                }
137                return true;
138        }
139
140        /**
141         * Returns true if <code>jid</code> is a bare JID ("foo@bar.com").
142         * <p>
143         * This method may return true for Strings that are not valid JIDs (e.g. because of Stringprep violations). Consider
144         * using <code>org.jxmpp.jid.util.JidUtil.validateBareJid(String)</code> from jxmpp-jid instead of this method as it
145         * exceptions provide a meaningful message string why the JID is not a bare JID and will also check for Stringprep
146         * errors.
147         * </p>
148         *
149         * @param jid the String to check.
150         * @return true if bare JID, false otherwise
151         */
152        public static boolean isBareJid(String jid) {
153                String domain = parseDomain(jid);
154                String resource = parseResource(jid);
155                return (domain != null && domain.length() > 0
156                                && (resource == null || resource.length() == 0));
157        }
158
159        private static final LruCache<String, String> LOCALPART_ESCAPE_CACHE = new LruCache<String, String>(100);
160        private static final LruCache<String, String> LOCALPART_UNESCAPE_CACHE = new LruCache<String, String>(100);
161
162        /**
163         * Escapes the localpart of a JID according to "JID Escaping" (XEP-0106).
164         * Escaping replaces characters prohibited by Nodeprep with escape sequences,
165         * as follows:
166         * <table border="1">
167         * <caption>Character mappings</caption>
168         * <tr><td><b>Unescaped Character</b></td><td><b>Encoded Sequence</b></td></tr>
169         * <tr><td>&lt;space&gt;</td><td>\20</td></tr>
170         * <tr><td>"</td><td>\22</td></tr>
171         * <tr><td>&amp;</td><td>\26</td></tr>
172         * <tr><td>'</td><td>\27</td></tr>
173         * <tr><td>/</td><td>\2f</td></tr>
174         * <tr><td>:</td><td>\3a</td></tr>
175         * <tr><td>&lt;</td><td>\3c</td></tr>
176         * <tr><td>&gt;</td><td>\3e</td></tr>
177         * <tr><td>@</td><td>\40</td></tr>
178         * <tr><td>\</td><td>\5c</td></tr>
179         * </table>
180         *
181         * <p>
182         * This process is useful when the localpart comes from an external source that doesn't
183         * conform to Nodeprep. For example, a username in LDAP may be "Joe Smith". Because
184         * the &lt;space&gt; character isn't a valid part of a localpart, the username should
185         * be escaped to "Joe\20Smith" before being made into a JID (e.g. "joe\20smith@example.com"
186         * after case-folding, etc. has been applied).
187         * </p>
188         *
189         * All localpart escaping and un-escaping must be performed manually at the appropriate
190         * time; the JID class will not escape or un-escape automatically.
191         *
192         * @param localpart the localpart.
193         * @return the escaped version of the localpart.
194         * @see <a href="http://xmpp.org/extensions/xep-0106.html">XEP-106: JID Escaping</a>
195         */
196        public static String escapeLocalpart(String localpart) {
197                if (localpart == null) {
198                        return null;
199                }
200                String res = LOCALPART_ESCAPE_CACHE.lookup(localpart);
201                if (res != null) {
202                        return res;
203                }
204                StringBuilder buf = new StringBuilder(localpart.length() + 8);
205                for (int i = 0, n = localpart.length(); i < n; i++) {
206                        char c = localpart.charAt(i);
207                        switch (c) {
208                        case '"':
209                                buf.append("\\22");
210                                break;
211                        case '&':
212                                buf.append("\\26");
213                                break;
214                        case '\'':
215                                buf.append("\\27");
216                                break;
217                        case '/':
218                                buf.append("\\2f");
219                                break;
220                        case ':':
221                                buf.append("\\3a");
222                                break;
223                        case '<':
224                                buf.append("\\3c");
225                                break;
226                        case '>':
227                                buf.append("\\3e");
228                                break;
229                        case '@':
230                                buf.append("\\40");
231                                break;
232                        case '\\':
233                                buf.append("\\5c");
234                                break;
235                        default: {
236                                if (Character.isWhitespace(c)) {
237                                        buf.append("\\20");
238                                } else {
239                                        buf.append(c);
240                                }
241                        }
242                        }
243                }
244                res = buf.toString();
245                LOCALPART_ESCAPE_CACHE.put(localpart, res);
246                return res;
247        }
248
249        /**
250         * Un-escapes the localpart of a JID according to "JID Escaping" (XEP-0106).
251         * Escaping replaces characters prohibited by Nodeprep with escape sequences,
252         * as follows:
253         * 
254         * <table border="1">
255         * <caption>Character mapping</caption>
256         * <tr><td><b>Unescaped Character</b></td><td><b>Encoded Sequence</b></td></tr>
257         * <tr><td>&lt;space&gt;</td><td>\20</td></tr>
258         * <tr><td>"</td><td>\22</td></tr>
259         * <tr><td>&amp;</td><td>\26</td></tr>
260         * <tr><td>'</td><td>\27</td></tr>
261         * <tr><td>/</td><td>\2f</td></tr>
262         * <tr><td>:</td><td>\3a</td></tr>
263         * <tr><td>&lt;</td><td>\3c</td></tr>
264         * <tr><td>&gt;</td><td>\3e</td></tr>
265         * <tr><td>@</td><td>\40</td></tr>
266         * <tr><td>\</td><td>\5c</td></tr>
267         * </table>
268         *
269         * <p>
270         * This process is useful when the localpart comes from an external source that doesn't
271         * conform to Nodeprep. For example, a username in LDAP may be "Joe Smith". Because
272         * the &lt;space&gt; character isn't a valid part of a localpart, the username should
273         * be escaped to "Joe\20Smith" before being made into a JID (e.g. "joe\20smith@example.com"
274         * after case-folding, etc. has been applied).
275         * </p>
276         *
277         * All localpart escaping and un-escaping must be performed manually at the appropriate
278         * time; the JID class will not escape or un-escape automatically.
279         *
280         * @param localpart the escaped version of the localpart.
281         * @return the un-escaped version of the localpart.
282         * @see <a href="http://xmpp.org/extensions/xep-0106.html">XEP-106: JID Escaping</a>
283         */
284        @SuppressWarnings("LabelledBreakTarget")
285        public static String unescapeLocalpart(String localpart) {
286                if (localpart == null) {
287                        return null;
288                }
289                String res = LOCALPART_UNESCAPE_CACHE.lookup(localpart);
290                if (res != null) {
291                        return res;
292                }
293                char[] localpartChars = localpart.toCharArray();
294                StringBuilder buf = new StringBuilder(localpartChars.length);
295                for (int i = 0, n = localpartChars.length; i < n; i++) {
296                        compare: {
297                                char c = localpart.charAt(i);
298                                if (c == '\\' && i + 2 < n) {
299                                        char c2 = localpartChars[i + 1];
300                                        char c3 = localpartChars[i + 2];
301                                        switch(c2) {
302                                        case '2':
303                                                switch (c3) {
304                                                case '0':
305                                                        buf.append(' ');
306                                                        i += 2;
307                                                        break compare;
308                                                case '2':
309                                                        buf.append('"');
310                                                        i += 2;
311                                                        break compare;
312                                                case '6':
313                                                        buf.append('&');
314                                                        i += 2;
315                                                        break compare;
316                                                case '7':
317                                                        buf.append('\'');
318                                                        i += 2;
319                                                        break compare;
320                                                case 'f':
321                                                        buf.append('/');
322                                                        i += 2;
323                                                        break compare;
324                                                }
325                                                break;
326                                        case '3':
327                                                switch (c3) {
328                                                case 'a':
329                                                        buf.append(':');
330                                                        i += 2;
331                                                        break compare;
332                                                case 'c':
333                                                        buf.append('<');
334                                                        i += 2;
335                                                        break compare;
336                                                case 'e':
337                                                        buf.append('>');
338                                                        i += 2;
339                                                        break compare;
340                                                }
341                                                break;
342                                        case '4':
343                                                if (c3 == '0') {
344                                                        buf.append("@");
345                                                        i += 2;
346                                                        break compare;
347                                                }
348                                                break;
349                                        case '5':
350                                                if (c3 == 'c') {
351                                                        buf.append("\\");
352                                                        i += 2;
353                                                        break compare;
354                                                }
355                                                break;
356                                        }
357                                }
358                                buf.append(c);
359                        }
360                }
361                res = buf.toString();
362                LOCALPART_UNESCAPE_CACHE.put(localpart, res);
363                return res;
364        }
365
366        /**
367         * Construct a JID String from the given parts.
368         *
369         * @param localpart the localpart.
370         * @param domainpart the domainpart.
371         * @return the constructed JID String.
372         */
373        public static String completeJidFrom(CharSequence localpart, CharSequence domainpart) {
374                return completeJidFrom(localpart != null ? localpart.toString() : null, domainpart.toString());
375        }
376
377        /**
378         * Construct a JID String from the given parts.
379         *
380         * @param localpart the localpart.
381         * @param domainpart the domainpart.
382         * @return the constructed JID String.
383         */
384        public static String completeJidFrom(String localpart, String domainpart) {
385                return completeJidFrom(localpart, domainpart, null);
386        }
387
388        /**
389         * Construct a JID String from the given parts.
390         *
391         * @param localpart the localpart.
392         * @param domainpart the domainpart.
393         * @param resource the resourcepart.
394         * @return the constructed JID String.
395         */
396        public static String completeJidFrom(CharSequence localpart, CharSequence domainpart, CharSequence resource) {
397                return completeJidFrom(localpart != null ? localpart.toString() : null, domainpart.toString(),
398                                resource != null ? resource.toString() : null);
399        }
400
401        /**
402         * Construct a JID String from the given parts.
403         *
404         * @param localpart the localpart.
405         * @param domainpart the domainpart.
406         * @param resource the resourcepart.
407         * @return the constructed JID String.
408         */
409        public static String completeJidFrom(String localpart, String domainpart, String resource) {
410                if (domainpart == null) {
411                        throw new IllegalArgumentException("domainpart must not be null");
412                }
413
414                int maxResLength = domainpart.length();
415                if (localpart != null) maxResLength += localpart.length() + 1;
416                if (resource != null) maxResLength += resource.length() + 1;
417
418                StringBuilder sb = new StringBuilder(maxResLength);
419                if (localpart != null) {
420                        sb.append(localpart).append('@');
421                }
422                sb.append(domainpart);
423                if (resource != null) {
424                        sb.append('/').append(resource);
425                }
426                return sb.toString();
427        }
428
429        /**
430         * Generate a unique key from a element name and namespace. This key can be used to lookup element/namespace
431         * information. The key is simply generated by concatenating the strings as follows:
432         * <code>element + '\t' + namespace</code>.
433         * <p>
434         * The tab character (\t) was chosen because it will be normalized, i.e. replace by space, in attribute values. It
435         * therefore should never appear in <code>element</code> or <code>namespace</code>. For more information about the
436         * normalization, see the XML specification § <a href="http://www.w3.org/TR/REC-xml/#AVNormalize">3.3.3
437         * Attribute-Value Normalization</a>.
438         * </p>
439         * 
440         * @param element the element.
441         * @param namespace the namespace.
442         * @return the unique key of element and namespace.
443         */
444        public static String generateKey(String element, String namespace) {
445                return element + '\t' + namespace;
446        }
447}