Detecting if a character in a String is an emoticon (using Android)

Question 1

I was in fact able to use the linked iOS code to create the following function. I didn't realize that a String that contains, for example, a single emoticon will have a length of 2. So you can check if a character is in fact a surrogate.

I'm not entirely sure how to handle else if (substring.length > 1) from the iOS code but I think Character.isHighSurrogate(myChar) does the same job in that instance.

private boolean containsIllegalCharacters(String displayName)
{
    final int nameLength = displayName.length();

    for (int i = 0; i < nameLength; i++)
    {
        final char hs = displayName.charAt(i);

        if (0xd800 <= hs && hs <= 0xdbff)
        {
            final char ls = displayName.charAt(i + 1);
            final int uc = ((hs - 0xd800) * 0x400) + (ls - 0xdc00) + 0x10000;

            if (0x1d000 <= uc && uc <= 0x1f77f)
            {
                return true;
            }
        }
        else if (Character.isHighSurrogate(hs))
        {
            final char ls = displayName.charAt(i + 1);

            if (ls == 0x20e3)
            {
                return true;
            }
        }
        else
        {
            // non surrogate
            if (0x2100 <= hs && hs <= 0x27ff)
            {
                return true;
            }
            else if (0x2B05 <= hs && hs <= 0x2b07)
            {
                return true;
            }
            else if (0x2934 <= hs && hs <= 0x2935)
            {
                return true;
            }
            else if (0x3297 <= hs && hs <= 0x3299)
            {
                return true;
            }
            else if (hs == 0xa9 || hs == 0xae || hs == 0x303d || hs == 0x3030 || hs == 0x2b55 || hs == 0x2b1c || hs == 0x2b1b || hs == 0x2b50)
            {
                return true;
            }
        }
    }

    return false;
}

Question 2

Four years later...

At this time, it might make more sense to take advantage of EmojiCompat. This code presumes you initialized EmojiCompat when your app was starting up. The basic idea here is to have EmojiCompat process your CharSequence, inserting instances of EmojiSpan wherever any emoji appear, and then examine the results.

public static boolean containsEmoji(CharSequence charSequence) {
    boolean result = false;
    CharSequence processed = EmojiCompat.get().process(charSequence, 0, charSequence.length() -1, Integer.MAX_VALUE, EmojiCompat.REPLACE_STRATEGY_ALL);
    if (processed instanceof Spannable) {
        Spannable spannable = (Spannable) processed;
        result = spannable.getSpans(0, spannable.length() - 1, EmojiSpan.class).length > 0;
    }
    return  result;
}

If you want to collect a list of the unique emoji that appear within a given CharSequence, you could do something like this, iterating over the results of getSpans() and finding the start and end of each span to capture the emoji discovered by EmojiCompat:

@NonNull
public static List<String> getUniqueEmoji(CharSequence charSequence) {
    Set<String> emojiList = new HashSet<>();
    CharSequence processed = EmojiCompat.get().process(charSequence, 0, charSequence.length() -1, Integer.MAX_VALUE, EmojiCompat.REPLACE_STRATEGY_ALL);
    if (processed instanceof Spannable) {
        Spannable spannable = (Spannable) processed;

        EmojiSpan[] emojiSpans = spannable.getSpans(0, spannable.length() - 1, EmojiSpan.class);
        for (EmojiSpan emojiSpan : emojiSpans) {
            int spanStart = spannable.getSpanStart(emojiSpan);
            int spanEnd = spannable.getSpanEnd(emojiSpan);
            CharSequence emojiCharSequence = spannable.subSequence(spanStart, spanEnd);
            emojiList.add(String.valueOf(emojiCharSequence));
        }
    }
    return emojiList.size() > 0 ? new ArrayList<>(emojiList) : new ArrayList<String>();
}

UPDATE: Here's an example of EmojiCompat initialization. This static method can be called from your Application onCreate() method, passing in the Application itself as the Context param.

@JvmStatic
fun initEmojiCompat(context: Context) {
    if (emojiCompatConfig != null) {
        // alternatively, EmojiCompat.reset() could be called here
        logger().w(LOGTAG, "EmojiCompat already initialized.")
        return
    }

    // "Noto Color Emoji Compat" doesn't have graphics for the following emojis:
    // U+1F5E3 "speaking head" (required)
    // U+1F441 "eye" (required)
    // U+1F575 "detective" (nice to have)
    val fontRequest = FontRequest(
        "com.google.android.gms.fonts",
        "com.google.android.gms",
        "Noto Color Emoji Compat",
        R.array.com_google_android_gms_fonts_certs
    )

    emojiCompatConfig = FontRequestEmojiCompatConfig(context, fontRequest)
        .setReplaceAll(false)
        .setEmojiSpanIndicatorEnabled(false)
        .registerInitCallback(initCallback)
        .also {
            EmojiCompat.init(it)
        }
}

Question 3

This is how Telegram does it:

private static boolean isEmoji(String message){
    return message.matches("(?:[\uD83C\uDF00-\uD83D\uDDFF]|[\uD83E\uDD00-\uD83E\uDDFF]|" +
        "[\uD83D\uDE00-\uD83D\uDE4F]|[\uD83D\uDE80-\uD83D\uDEFF]|" +
        "[\u2600-\u26FF]\uFE0F?|[\u2700-\u27BF]\uFE0F?|\u24C2\uFE0F?|" +
        "[\uD83C\uDDE6-\uD83C\uDDFF]{1,2}|" +
        "[\uD83C\uDD70\uD83C\uDD71\uD83C\uDD7E\uD83C\uDD7F\uD83C\uDD8E\uD83C\uDD91-\uD83C\uDD9A]\uFE0F?|" +
        "[\u0023\u002A\u0030-\u0039]\uFE0F?\u20E3|[\u2194-\u2199\u21A9-\u21AA]\uFE0F?|[\u2B05-\u2B07\u2B1B\u2B1C\u2B50\u2B55]\uFE0F?|" +
        "[\u2934\u2935]\uFE0F?|[\u3030\u303D]\uFE0F?|[\u3297\u3299]\uFE0F?|" +
        "[\uD83C\uDE01\uD83C\uDE02\uD83C\uDE1A\uD83C\uDE2F\uD83C\uDE32-\uD83C\uDE3A\uD83C\uDE50\uD83C\uDE51]\uFE0F?|" +
        "[\u203C\u2049]\uFE0F?|[\u25AA\u25AB\u25B6\u25C0\u25FB-\u25FE]\uFE0F?|" +
        "[\u00A9\u00AE]\uFE0F?|[\u2122\u2139]\uFE0F?|\uD83C\uDC04\uFE0F?|\uD83C\uDCCF\uFE0F?|" +
        "[\u231A\u231B\u2328\u23CF\u23E9-\u23F3\u23F8-\u23FA]\uFE0F?)+");
}

It is Line 21,026.

Question 4

Try this...

if (Integer.parseInt("1f600", 16) <= (int)'☺' && (int)'☺' <= Integer.parseInt("1f64f", 16)) {
    Print.d("Unicode", "groovy!");
}

This might work because the hexidecimal value and the char value are both being converted to ints.