diff --git a/lib/src/main/java/net/fellbaum/jemoji/EmojiManager.java b/lib/src/main/java/net/fellbaum/jemoji/EmojiManager.java index 8181e6c..5919815 100644 --- a/lib/src/main/java/net/fellbaum/jemoji/EmojiManager.java +++ b/lib/src/main/java/net/fellbaum/jemoji/EmojiManager.java @@ -15,10 +15,7 @@ import java.util.stream.Collector; import java.util.stream.Collectors; -import static net.fellbaum.jemoji.EmojiUtils.addColonToAlias; -import static net.fellbaum.jemoji.EmojiUtils.findEmojiByEitherAlias; -import static net.fellbaum.jemoji.EmojiUtils.isStringNullOrEmpty; -import static net.fellbaum.jemoji.EmojiUtils.removeColonFromAlias; +import static net.fellbaum.jemoji.EmojiUtils.*; @SuppressWarnings("unused") public final class EmojiManager { @@ -308,6 +305,56 @@ public static List extractEmojisInOrder(final String text) { return Collections.unmodifiableList(emojis); } + /** + * Extracts all emojis from the given text in the order they appear. + * + * @param text The text to extract emojis from. + * @return A list of indexed emojis. + */ + public static List extractEmojisInOrderWithIndex(final String text) { + if (isStringNullOrEmpty(text)) return Collections.emptyList(); + + final List emojis = new ArrayList<>(); + + final int[] textCodePointsArray = text.codePoints().toArray(); + final long textCodePointsLength = textCodePointsArray.length; + + int charIndex = 0; + nextTextIteration: + for (int textIndex = 0; textIndex < textCodePointsLength; textIndex++) { + final int currentCodepoint = textCodePointsArray[textIndex]; + final List emojisByCodePoint = EMOJI_FIRST_CODEPOINT_TO_EMOJIS_ORDER_CODEPOINT_LENGTH_DESCENDING.get(currentCodepoint); + if (emojisByCodePoint == null) { + charIndex += Character.charCount(currentCodepoint); + continue; + } + for (final Emoji emoji : emojisByCodePoint) { + final int[] emojiCodePointsArray = emoji.getEmoji().codePoints().toArray(); + final int emojiCodePointsLength = emojiCodePointsArray.length; + // Emoji code points are in bounds of the text code points + if (!((textIndex + emojiCodePointsLength) <= textCodePointsLength)) { + continue; + } + + for (int emojiCodePointIndex = 0; emojiCodePointIndex < emojiCodePointsLength; emojiCodePointIndex++) { + if (textCodePointsArray[textIndex + emojiCodePointIndex] != emojiCodePointsArray[emojiCodePointIndex]) { + break; + } + if (emojiCodePointIndex == (emojiCodePointsLength - 1)) { + emojis.add(new IndexedEmoji(emoji, charIndex, textIndex)); + textIndex += emojiCodePointsLength - 1; + charIndex += emoji.getEmoji().length(); + continue nextTextIteration; + } + } + } + + charIndex += Character.charCount(currentCodepoint); + } + + return Collections.unmodifiableList(emojis); + } + /** * Extracts all emojis from the given text. * diff --git a/lib/src/main/java/net/fellbaum/jemoji/IndexedEmoji.java b/lib/src/main/java/net/fellbaum/jemoji/IndexedEmoji.java new file mode 100644 index 0000000..28f4a18 --- /dev/null +++ b/lib/src/main/java/net/fellbaum/jemoji/IndexedEmoji.java @@ -0,0 +1,51 @@ +package net.fellbaum.jemoji; + +/** + * Represents an emoji with character and codepoint indexes. + * + * @see EmojiManager#extractEmojisInOrderWithIndex(String) + */ +public class IndexedEmoji { + private final Emoji emoji; + private final int charIndex; + private final int codePointIndex; + + IndexedEmoji(Emoji emoji, int charIndex, int codePointIndex) { + this.emoji = emoji; + this.charIndex = charIndex; + this.codePointIndex = codePointIndex; + } + + /** + * Gets the captured {@link Emoji emoji}. + * + * @return The captured emoji + */ + public Emoji getEmoji() { + return emoji; + } + + /** + * Gets the character index at which the emoji starts. + * + *

The index is included in {@code 0 < input <= input.length() - 1} + * + * @return The character index at which the emoji starts + */ + public int getCharIndex() { + return charIndex; + } + + /** + * Gets the codepoint index at which the emoji starts. + * + *

This must not be confused with {@link #getCharIndex()}, + * a codepoint can contain one or two characters, + * which means the codepoint index will likely be lower than the character index. + * + * @return The character index at which the emoji starts + */ + public int getCodePointIndex() { + return codePointIndex; + } +} diff --git a/lib/src/test/java/net/fellbaum/jemoji/EmojiManagerTest.java b/lib/src/test/java/net/fellbaum/jemoji/EmojiManagerTest.java index 3c818f9..8169e18 100644 --- a/lib/src/test/java/net/fellbaum/jemoji/EmojiManagerTest.java +++ b/lib/src/test/java/net/fellbaum/jemoji/EmojiManagerTest.java @@ -8,14 +8,13 @@ import java.util.Set; import java.util.stream.Collectors; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.*; public class EmojiManagerTest { public static final String ALL_EMOJIS_STRING = EmojiManager.getAllEmojisLengthDescending().stream().map(Emoji::getEmoji).collect(Collectors.joining()); private static final String SIMPLE_EMOJI_STRING = "Hello ❤️ ❤ ❤❤️ World"; + private static final String SIMPLE_POSITION_EMOJI_STRING = "Hello ❤️ ❤ 👩🏻‍🤝‍👨🏼 ❤❤️ World"; @Test public void extractEmojisInOrder() { @@ -28,6 +27,37 @@ public void extractEmojisInOrder() { assertEquals(allEmojis, emojis); } + @Test + public void extractEmojisInOrderWithIndex() { + List emojis = EmojiManager.extractEmojisInOrderWithIndex(ALL_EMOJIS_STRING + ALL_EMOJIS_STRING) + .stream() + .map(IndexedEmoji::getEmoji) + .collect(Collectors.toList()); + + assertEquals(EmojiManager.getAllEmojisLengthDescending().size() * 2, emojis.size()); + List allEmojis = new ArrayList<>(EmojiManager.getAllEmojisLengthDescending()); + allEmojis.addAll(EmojiManager.getAllEmojisLengthDescending()); + + assertEquals(allEmojis, emojis); + } + + @Test + public void extractEmojisInOrderWithIndexCheckPosition() { + List emojis = EmojiManager.extractEmojisInOrderWithIndex(SIMPLE_POSITION_EMOJI_STRING); + assertEquals(5, emojis.size()); + + checkIndexedEmoji(emojis.get(0), 6, 6); + checkIndexedEmoji(emojis.get(1), 9, 9); + checkIndexedEmoji(emojis.get(2), 11, 11); + checkIndexedEmoji(emojis.get(3), 24, 19); + checkIndexedEmoji(emojis.get(4), 25, 20); + } + + private void checkIndexedEmoji(IndexedEmoji indexedEmoji, int expectedCharIndex, int expectedCodePointIndex) { + assertEquals(expectedCharIndex, indexedEmoji.getCharIndex()); + assertEquals(expectedCodePointIndex, indexedEmoji.getCodePointIndex()); + } + @Test public void extractEmojis() { Set emojis = EmojiManager.extractEmojis(ALL_EMOJIS_STRING + ALL_EMOJIS_STRING);