Skip to content

Commit

Permalink
Detect if grapheme is an emoji
Browse files Browse the repository at this point in the history
This adds the necessary logic to detect if a grapheme cluster is an
emoji based on @rivo's [comment](rivo#27 (comment))

Fixes: rivo#27
  • Loading branch information
aymanbagabas committed May 30, 2024
1 parent 087b3e4 commit 41a262e
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 0 deletions.
52 changes: 52 additions & 0 deletions grapheme.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ func (g *Graphemes) Bytes() []byte {
return []byte(g.cluster)
}

// IsEmoji returns true if the current grapheme cluster is an emoji.
func (g *Graphemes) IsEmoji() bool {
return IsGraphemeClusterEmoji([]byte(g.cluster), g.Width())
}

// Positions returns the interval of the current grapheme cluster as byte
// positions into the original string. The first returned value "from" indexes
// the first byte and the second returned value "to" indexes the first byte that
Expand Down Expand Up @@ -343,3 +348,50 @@ func FirstGraphemeClusterInString(str string, state int) (cluster, rest string,
}
}
}

const (
regionalIndicatorA = 0x1F1E6
regionalIndicatorZ = 0x1F1FF
)

// IsGraphemeClusterEmoji returns true if the given byte slice grapheme cluster
// and width is an emoji according to the Unicode Standard Annex #51, Unicode
// Emoji.
func IsGraphemeClusterEmoji(cluster []byte, width int) bool {
return isGraphemeClusterEmoji(cluster, utf8.DecodeRune, width)
}

// IsGraphemeClusterInStringEmoji is like [IsGraphemeClusterEmoji] but its input
// is a string.
func IsGraphemeClusterInStringEmoji(cluster string, width int) bool {
return isGraphemeClusterEmoji(cluster, utf8.DecodeRuneInString, width)
}

func isGraphemeClusterEmoji[C []byte | string, F func(C) (rune, int)](cluster C, fn F, width int) bool {
if width != 2 {
return false
}

r, rw := fn(cluster)
if r == vs16 {
return true
}
if r >= regionalIndicatorA && r <= regionalIndicatorZ {
return true
}
if propertyGraphemes(r) == prExtendedPictographic &&
property(emojiPresentation, r) == prEmojiPresentation {
return true
}

cluster = cluster[rw:]
for len(cluster) > 0 {
r, rw := fn(cluster)
if r == vs16 {
return true
}
cluster = cluster[rw:]
}

return false
}
22 changes: 22 additions & 0 deletions grapheme_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,28 @@ func TestGraphemesFunctionString(t *testing.T) {
}
}

func TestIsGraphemeClusterEmoji(t *testing.T) {
testCases := []struct {
cluster string
width int
expected bool
}{
{"👋", 2, true},
{"a", 1, false},
{"咪", 2, false},
{"ض", 1, false},
{"🇩🇪", 2, true},
{"👨🏿‍🌾", 2, true},
{"🏳️‍🌈", 2, true},
{"☺️", 2, true}, // White smiling face (with variation selector 16 = emoji presentation)
}
for index, testCase := range testCases {
if result := IsGraphemeClusterEmoji([]byte(testCase.cluster), testCase.width); result != testCase.expected {
t.Errorf(`Test case %d %q failed: Expected %t, got %t`, index, testCase.cluster, testCase.expected, result)
}
}
}

// Benchmark the use of the Graphemes class.
func BenchmarkGraphemesClass(b *testing.B) {
for i := 0; i < b.N; i++ {
Expand Down

0 comments on commit 41a262e

Please sign in to comment.