Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feed options + tests #54

Merged
merged 2 commits into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/scrape-feed/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func main() {
flags.Usage()
os.Exit(1)
}
feedFetcher := feed.NewFeedFetcher(feed.DefaultOptions)
feedFetcher := feed.MustFeedFetcher()
resource, err := feedFetcher.Fetch(feedUrl)
if err != nil {
slog.Error("Error fetching", "url", feedUrl, "err", err)
Expand Down
2 changes: 1 addition & 1 deletion database/sqlite/migrations/00001_init.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ CREATE TABLE IF NOT EXISTS urls (
parsed_url TEXT NOT NULL,
fetch_time INTEGER DEFAULT (unixepoch() ),
fetch_method INTEGER NOT NULL DEFAULT 0,
expires INTEGER DEFAULT (unixepoch() + 86400),
expires INTEGER DEFAULT (unixepoch() + (86400 * 30)),
metadata TEXT,
content_text TEXT
)
Expand Down
29 changes: 29 additions & 0 deletions database/sqlite/migrations/00003_feed_refresh.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
-- This migration adds a feed_refresh table to the database.
-- +goose Up
-- +goose StatementBegin

CREATE TABLE IF NOT EXISTS feed_refresh (
url TEXT PRIMARY KEY NOT NULL ON CONFLICT REPLACE COLLATE NOCASE,
last_request INTEGER NOT NULL DEFAULT (unixepoch() ),
refresh_interval INTEGER NOT NULL DEFAULT (3600 * 12),
last_refresh INTEGER NOT NULL DEFAULT 0,
idle_timeout INTEGER NOT NULL DEFAULT (86400 * 7)
)
STRICT;

CREATE INDEX IF NOT EXISTS feed_refresh_url_index ON feed_refresh (
url ASC
);

CREATE INDEX IF NOT EXISTS feed_refresh_time_index ON feed_refresh (
last_refresh ASC,
refresh_interval ASC,
url ASC
);

-- +goose StatementEnd

-- +goose Down
-- +goose StatementBegin
DROP TABLE IF EXISTS feed_refresh;
-- +goose StatementEnd
63 changes: 51 additions & 12 deletions fetch/feed/feed.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,43 @@ const (
DefaultTimeout = 30 * time.Second
)

type option func(*config) error

func WithUserAgent(ua string) option {
return func(c *config) error {
if ua == "" {
return errors.New("user agent must not be empty")
}
c.UserAgent = ua
return nil
}
}

func WithTimeout(t time.Duration) option {
return func(c *config) error {
if t <= 0 {
return errors.New("timeout must be positive")
}
c.Timeout = t
return nil
}
}

func WithClient(client *http.Client) option {
return func(c *config) error {
c.Client = client
return nil
}
}

var (
DefaultOptions = Options{
DefaultConfig = config{
Timeout: DefaultTimeout,
UserAgent: fetch.DefaultUserAgent,
}
)

type Options struct {
type config struct {
UserAgent string
Timeout time.Duration
Client *http.Client
Expand All @@ -36,21 +65,31 @@ type FeedFetcher struct {
timeout time.Duration
}

func NewFeedFetcher(options Options) *FeedFetcher {
parser := gofeed.NewParser()
if options.UserAgent != "" {
parser.UserAgent = options.UserAgent
func MustFeedFetcher(options ...option) *FeedFetcher {
f, err := NewFeedFetcher(options...)
if err != nil {
panic(err)
}
if options.Client != nil {
parser.Client = options.Client
return f
}

func NewFeedFetcher(options ...option) (*FeedFetcher, error) {
config := DefaultConfig
for _, opt := range options {
if err := opt(&config); err != nil {
return nil, err
}
}
if options.Timeout == 0 {
options.Timeout = DefaultTimeout
parser := gofeed.NewParser()
parser.UserAgent = config.UserAgent

if config.Client != nil {
parser.Client = config.Client
}
return &FeedFetcher{
parser: parser,
timeout: options.Timeout,
}
timeout: config.Timeout,
}, nil
}

func (f *FeedFetcher) Fetch(url *nurl.URL) (*resource.Feed, error) {
Expand Down
115 changes: 108 additions & 7 deletions fetch/feed/feed_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ func TestFetchCancelsOnTimeout(t *testing.T) {
}))
defer ts.Close()
client := ts.Client()
options := DefaultOptions
options.Timeout = timeout
options.Client = client
fetcher := NewFeedFetcher(options)
fetcher := MustFeedFetcher(
WithTimeout(timeout),
WithClient(client),
)
url, _ := nurl.Parse(ts.URL)
_, err := fetcher.Fetch(url)
if err == nil {
Expand All @@ -59,9 +59,9 @@ func TestFetchReturnsRequestedURL(t *testing.T) {
}))
defer ts.Close()
client := ts.Client()
options := DefaultOptions
options.Client = client
fetcher := NewFeedFetcher(options)
fetcher := MustFeedFetcher(
WithClient(client),
)
url, _ := nurl.Parse(ts.URL)
feed, err := fetcher.Fetch(url)
if err != nil {
Expand All @@ -71,3 +71,104 @@ func TestFetchReturnsRequestedURL(t *testing.T) {
t.Errorf("Expected URL %s, got %s", url, feed.RequestedURL)
}
}

func TestWithTimeout(t *testing.T) {
tests := []struct {
name string
timeout time.Duration
expectErr bool
}{
{
name: "valid",
timeout: 50 * time.Millisecond,
expectErr: false,
},
{
name: "negative",
timeout: -1 * time.Millisecond,
expectErr: true,
},
{
name: "zero",
timeout: 0,
expectErr: true,
},
}

for _, tt := range tests {
err := WithTimeout(tt.timeout)(&config{})
if tt.expectErr && err == nil {
t.Errorf("Expected error for %s, got nil", tt.timeout)
} else if !tt.expectErr && err != nil {
t.Errorf("Unexpected error for %s: %s", tt.timeout, err)
}
}
}

func TestWithUserAgentOption(t *testing.T) {
tests := []struct {
name string
ua string
expectErr bool
}{
{
name: "valid",
ua: "test",
expectErr: false,
},
{
name: "empty",
ua: "",
expectErr: true,
},
}

for _, tt := range tests {
err := WithUserAgent(tt.ua)(&config{})
if tt.expectErr && err == nil {
t.Errorf("[%s] Expected error for %s, got nil", tt.name, tt.ua)
} else if !tt.expectErr && err != nil {
t.Errorf("[%s] Unexpected error for %s: %s", tt.name, tt.ua, err)
}
}
}

func TestUserAgent(t *testing.T) {
tests := []struct {
name string
option option
expected string
}{
{
name: "default",
option: nil,
expected: fetch.DefaultUserAgent,
},
{
name: "custom",
option: WithUserAgent("test/1.0"),
expected: "test/1.0",
},
}
for _, tt := range tests {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.UserAgent() != tt.expected {
t.Errorf("[%s] Expected %s, got %s", tt.name, tt.expected, r.UserAgent())
}
w.WriteHeader(http.StatusOK)
w.Header().Set("Content-Type", "application/rss+xml")
w.Write([]byte(dummyRSS))
}))
t.Cleanup(func() { ts.Close() })
client := ts.Client()
options := []option{WithClient(client)}
if tt.option != nil {
options = append(options, tt.option)
}
fetcher := MustFeedFetcher(options...)
url, _ := nurl.Parse(ts.URL)
if _, err := fetcher.Fetch(url); err != nil {
t.Errorf("[%s] Unexpected error for %s: %s", tt.name, url, err)
}
}
}
2 changes: 1 addition & 1 deletion internal/server/api/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ func NewAPIServer(ctx context.Context, opts ...option) (*Server, error) {
return nil, errors.New("no URL fetcher provided")
}
if ss.feedFetcher == nil {
ss.feedFetcher = feed.NewFeedFetcher(feed.DefaultOptions)
ss.feedFetcher = feed.MustFeedFetcher()
}
return ss, nil
}
Expand Down
2 changes: 1 addition & 1 deletion internal/server/version/version.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package version

const (
Commit = "42a57f1"
Commit = "2b63994"
Tag = "v0.8.6"
RepoURL = "https://github.com/efixler/scrape"
)