Skip to content

Commit

Permalink
Add EmojiManager#extractEmojisInOrderWithIndex
Browse files Browse the repository at this point in the history
  • Loading branch information
freya022 committed Nov 24, 2023
1 parent 6daf76e commit 335e13c
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 7 deletions.
55 changes: 51 additions & 4 deletions lib/src/main/java/net/fellbaum/jemoji/EmojiManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,7 @@
import java.util.stream.Collector;
import java.util.stream.Collectors;

import static net.fellbaum.jemoji.EmojiUtils.addColonToAlias;
import static net.fellbaum.jemoji.EmojiUtils.findEmojiByEitherAlias;
import static net.fellbaum.jemoji.EmojiUtils.isStringNullOrEmpty;
import static net.fellbaum.jemoji.EmojiUtils.removeColonFromAlias;
import static net.fellbaum.jemoji.EmojiUtils.*;

@SuppressWarnings("unused")
public final class EmojiManager {
Expand Down Expand Up @@ -308,6 +305,56 @@ public static List<Emoji> extractEmojisInOrder(final String text) {
return Collections.unmodifiableList(emojis);
}

/**
* Extracts all emojis from the given text in the order they appear.
*
* @param text The text to extract emojis from.
* @return A list of indexed emojis.
*/
public static List<IndexedEmoji> extractEmojisInOrderWithIndex(final String text) {
if (isStringNullOrEmpty(text)) return Collections.emptyList();

final List<IndexedEmoji> emojis = new ArrayList<>();

final int[] textCodePointsArray = text.codePoints().toArray();
final long textCodePointsLength = textCodePointsArray.length;

int charIndex = 0;
nextTextIteration:
for (int textIndex = 0; textIndex < textCodePointsLength; textIndex++) {
final int currentCodepoint = textCodePointsArray[textIndex];
final List<Emoji> emojisByCodePoint = EMOJI_FIRST_CODEPOINT_TO_EMOJIS_ORDER_CODEPOINT_LENGTH_DESCENDING.get(currentCodepoint);
if (emojisByCodePoint == null) {
charIndex += Character.charCount(currentCodepoint);
continue;
}
for (final Emoji emoji : emojisByCodePoint) {
final int[] emojiCodePointsArray = emoji.getEmoji().codePoints().toArray();
final int emojiCodePointsLength = emojiCodePointsArray.length;
// Emoji code points are in bounds of the text code points
if (!((textIndex + emojiCodePointsLength) <= textCodePointsLength)) {
continue;
}

for (int emojiCodePointIndex = 0; emojiCodePointIndex < emojiCodePointsLength; emojiCodePointIndex++) {
if (textCodePointsArray[textIndex + emojiCodePointIndex] != emojiCodePointsArray[emojiCodePointIndex]) {
break;
}
if (emojiCodePointIndex == (emojiCodePointsLength - 1)) {
emojis.add(new IndexedEmoji(emoji, charIndex, textIndex));
textIndex += emojiCodePointsLength - 1;
charIndex += emoji.getEmoji().length();
continue nextTextIteration;
}
}
}

charIndex += Character.charCount(currentCodepoint);
}

return Collections.unmodifiableList(emojis);
}

/**
* Extracts all emojis from the given text.
*
Expand Down
51 changes: 51 additions & 0 deletions lib/src/main/java/net/fellbaum/jemoji/IndexedEmoji.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package net.fellbaum.jemoji;

/**
* Represents an emoji with character and codepoint indexes.
*
* @see EmojiManager#extractEmojisInOrderWithIndex(String)
*/
public class IndexedEmoji {
private final Emoji emoji;
private final int charIndex;
private final int codePointIndex;

IndexedEmoji(Emoji emoji, int charIndex, int codePointIndex) {
this.emoji = emoji;
this.charIndex = charIndex;
this.codePointIndex = codePointIndex;
}

/**
* Gets the captured {@link Emoji emoji}.
*
* @return The captured emoji
*/
public Emoji getEmoji() {
return emoji;
}

/**
* Gets the character index at which the emoji starts.
*
* <p>The index is included in {@code 0 < input <= input.length() - 1}
*
* @return The character index at which the emoji starts
*/
public int getCharIndex() {
return charIndex;
}

/**
* Gets the codepoint index at which the emoji starts.
*
* <p>This must not be confused with {@link #getCharIndex()},
* a codepoint can contain one or two characters,
* which means the codepoint index will likely be lower than the character index.
*
* @return The character index at which the emoji starts
*/
public int getCodePointIndex() {
return codePointIndex;
}
}
36 changes: 33 additions & 3 deletions lib/src/test/java/net/fellbaum/jemoji/EmojiManagerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,13 @@
import java.util.Set;
import java.util.stream.Collectors;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.*;

public class EmojiManagerTest {

public static final String ALL_EMOJIS_STRING = EmojiManager.getAllEmojisLengthDescending().stream().map(Emoji::getEmoji).collect(Collectors.joining());
private static final String SIMPLE_EMOJI_STRING = "Hello ❤️ ❤ ❤❤️ World";
private static final String SIMPLE_POSITION_EMOJI_STRING = "Hello ❤️ ❤ 👩🏻‍🤝‍👨🏼 ❤❤️ World";

@Test
public void extractEmojisInOrder() {
Expand All @@ -28,6 +27,37 @@ public void extractEmojisInOrder() {
assertEquals(allEmojis, emojis);
}

@Test
public void extractEmojisInOrderWithIndex() {
List<Emoji> emojis = EmojiManager.extractEmojisInOrderWithIndex(ALL_EMOJIS_STRING + ALL_EMOJIS_STRING)
.stream()
.map(IndexedEmoji::getEmoji)
.collect(Collectors.toList());

assertEquals(EmojiManager.getAllEmojisLengthDescending().size() * 2, emojis.size());
List<Emoji> allEmojis = new ArrayList<>(EmojiManager.getAllEmojisLengthDescending());
allEmojis.addAll(EmojiManager.getAllEmojisLengthDescending());

assertEquals(allEmojis, emojis);
}

@Test
public void extractEmojisInOrderWithIndexCheckPosition() {
List<IndexedEmoji> emojis = EmojiManager.extractEmojisInOrderWithIndex(SIMPLE_POSITION_EMOJI_STRING);
assertEquals(5, emojis.size());

checkIndexedEmoji(emojis.get(0), 6, 6);
checkIndexedEmoji(emojis.get(1), 9, 9);
checkIndexedEmoji(emojis.get(2), 11, 11);
checkIndexedEmoji(emojis.get(3), 24, 19);
checkIndexedEmoji(emojis.get(4), 25, 20);
}

private void checkIndexedEmoji(IndexedEmoji indexedEmoji, int expectedCharIndex, int expectedCodePointIndex) {
assertEquals(expectedCharIndex, indexedEmoji.getCharIndex());
assertEquals(expectedCodePointIndex, indexedEmoji.getCodePointIndex());
}

@Test
public void extractEmojis() {
Set<Emoji> emojis = EmojiManager.extractEmojis(ALL_EMOJIS_STRING + ALL_EMOJIS_STRING);
Expand Down

0 comments on commit 335e13c

Please sign in to comment.