-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #20 from heussd/feature/keywords-file-in-the-cloud
Implement cloud-based keywords file which will be retrieved during op…
- Loading branch information
Showing
6 changed files
with
176 additions
and
70 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
package model | ||
|
||
import "github.com/dlclark/regexp2" | ||
|
||
type KeywordEntry struct { | ||
Regexp regexp2.Regexp | ||
Id string | ||
Text string | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
package cloudtextfile | ||
|
||
import ( | ||
"fmt" | ||
"io" | ||
"net/http" | ||
"strings" | ||
"time" | ||
|
||
"github.com/dlclark/regexp2" | ||
"github.com/heussd/nats-news-keyword-matcher.go/internal/config" | ||
"github.com/heussd/nats-news-keyword-matcher.go/internal/model" | ||
) | ||
|
||
var ( | ||
cachedKeywords []model.KeywordEntry | ||
lastGenerated time.Time | ||
cacheDuration = 20 * time.Minute | ||
) | ||
|
||
func RetrieveKeywordsFile() (keywords []string, err error) { | ||
client := &http.Client{} | ||
var req *http.Request | ||
|
||
if req, err = http.NewRequest("GET", config.KeywordsFileUrl, nil); err != nil { | ||
return nil, err | ||
} | ||
|
||
var response *http.Response | ||
|
||
if response, err = client.Do(req); err != nil { | ||
return nil, err | ||
} | ||
|
||
if status := response.StatusCode; status != 200 { | ||
return nil, fmt.Errorf("failed to retrieve keywords file: status code %d", status) | ||
} | ||
|
||
var body []byte | ||
if body, err = io.ReadAll(response.Body); err != nil { | ||
return nil, err | ||
} | ||
|
||
keywords = strings.Split(string(body), "\n") | ||
|
||
return keywords, nil | ||
} | ||
|
||
func humanReadable(regex string) string { | ||
var s = regex | ||
var err error | ||
for _, r := range cleanUpRegexes { | ||
if s, err = r.Replace(s, " ", 0, -1); err != nil { | ||
panic(err) | ||
} | ||
} | ||
|
||
return strings.TrimSpace(s) | ||
} | ||
|
||
var cleanUpRegexes = []regexp2.Regexp{ | ||
*regexp2.MustCompile("[^a-zA-Z]", 0), | ||
*regexp2.MustCompile("\\s\\S\\s", 0), | ||
*regexp2.MustCompile("\\s\\s+", 0), | ||
} | ||
|
||
func CachedParsedKeywords() (keywords []model.KeywordEntry, err error) { | ||
if time.Since(lastGenerated) > cacheDuration { | ||
|
||
keywords = []model.KeywordEntry{} | ||
|
||
var plainKeywords []string | ||
if plainKeywords, err = RetrieveKeywordsFile(); err != nil { | ||
return nil, err | ||
} | ||
|
||
for _, text := range plainKeywords { | ||
if text == "" || strings.HasPrefix(text, "#") { | ||
continue | ||
} | ||
|
||
fmt.Printf("Parsing \"%s\" as regex\n", text) | ||
|
||
var regex = regexp2.MustCompile(text, 0) | ||
keywords = append(keywords, model.KeywordEntry{ | ||
Regexp: *regex, | ||
Id: humanReadable(text), | ||
Text: text, | ||
}) | ||
} | ||
cachedKeywords = keywords | ||
lastGenerated = time.Now() | ||
} | ||
|
||
return cachedKeywords, nil | ||
} |
56 changes: 56 additions & 0 deletions
56
keyword-matcher-go/pkg/cloudtextfile/cloudtextfile_test.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
package cloudtextfile | ||
|
||
import ( | ||
"testing" | ||
"time" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func Test(t *testing.T) { | ||
var keywords []string | ||
var err error | ||
if keywords, err = RetrieveKeywordsFile(); err != nil { | ||
t.Error(err) | ||
} | ||
|
||
assert.Equal(t, | ||
12, | ||
len(keywords)) | ||
|
||
assert.Equal(t, | ||
"(?i)\\b(Apple|peach)", | ||
keywords[0]) | ||
} | ||
|
||
func TestCache(t *testing.T) { | ||
// First call to populate the cache | ||
keywords, err := CachedParsedKeywords() | ||
if err != nil { | ||
t.Error(err) | ||
} | ||
|
||
assert.NotNil(t, keywords) | ||
assert.Greater(t, len(keywords), 0) | ||
|
||
// Store the time of the first cache generation | ||
firstGenerated := lastGenerated | ||
|
||
// Wait for a short duration and call again | ||
time.Sleep(1 * time.Second) | ||
keywords, err = CachedParsedKeywords() | ||
if err != nil { | ||
t.Error(err) | ||
} | ||
|
||
// Ensure the cache is still valid and hasn't been regenerated | ||
assert.Equal(t, firstGenerated, lastGenerated) | ||
assert.NotNil(t, keywords) | ||
assert.Greater(t, len(keywords), 0) | ||
|
||
} | ||
|
||
func TestHumanReadable(t *testing.T) { | ||
assert.Equal(t, "delicious pie recipes", humanReadable("(?i)(delicious).*(pie|recipes)")) | ||
|
||
} |