-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathngram.go
69 lines (58 loc) · 1.61 KB
/
ngram.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
package bag
import (
"bytes"
)
// toNGrams will convert inbound data to an nGram of provided size
func toNGrams(in string, size int) (ns []string) {
var n nGram
n.circularBuffer = newCircularBuffer[string](size)
// Iterate inbound data as words
toWords(in, func(word string) {
// Append word to nGram
n.Shift(word)
if !n.IsFull() {
// NGram is not full - we do not want to append yet, return
return
}
// Append current nGram to nGrams slice
ns = append(ns, n.String())
})
if !n.IsFull() && !n.IsZero() {
// The nGram is not full, so we haven't appended yet
// The nGram is not empty, so we have something to append
// Append current nGram to nGrams slice
ns = append(ns, n.String())
}
return
}
// nGram represents an N-Gram (variable sized)
type nGram struct {
*circularBuffer[string]
}
// String will convert the nGram contents to a string
func (n *nGram) String() (out string) {
// Initialize buffer
buf := bytes.NewBuffer(nil)
// Iterate through nGram values
n.ForEach(func(value string) (end bool) {
if buf.Len() > 0 {
// Buffer is not empty, prefix the iterating value with a space
buf.WriteByte(' ')
}
// Write value to buffer
buf.WriteString(value)
return
})
// Return buffer as string
return buf.String()
}
// IsZero returns whether or not the nGram is empty
func (n nGram) IsZero() bool {
// Return result of if the value in the first position is populated
return len(n.s[0]) == 0
}
// IsFull returns whether or not the nGram is full
func (n nGram) IsFull() bool {
// Return result of if the value in the last position is empty
return len(n.s[len(n.s)-1]) > 0
}