From 2b639946ced6324de6f518653da7863a30b7d6b4 Mon Sep 17 00:00:00 2001 From: Eric Fixler Date: Fri, 16 Aug 2024 14:12:26 +0100 Subject: [PATCH 1/2] Feed options + tests --- cmd/scrape-feed/main.go | 2 +- database/sqlite/migrations/00001_init.sql | 2 +- .../sqlite/migrations/00003_feed_refresh.sql | 29 +++++ fetch/feed/feed.go | 63 ++++++++-- fetch/feed/feed_test.go | 115 ++++++++++++++++-- internal/server/api/server.go | 2 +- internal/server/version/version.go | 2 +- 7 files changed, 192 insertions(+), 23 deletions(-) create mode 100644 database/sqlite/migrations/00003_feed_refresh.sql diff --git a/cmd/scrape-feed/main.go b/cmd/scrape-feed/main.go index 11b215f..d88335e 100644 --- a/cmd/scrape-feed/main.go +++ b/cmd/scrape-feed/main.go @@ -32,7 +32,7 @@ func main() { flags.Usage() os.Exit(1) } - feedFetcher := feed.NewFeedFetcher(feed.DefaultOptions) + feedFetcher := feed.MustFeedFetcher() resource, err := feedFetcher.Fetch(feedUrl) if err != nil { slog.Error("Error fetching", "url", feedUrl, "err", err) diff --git a/database/sqlite/migrations/00001_init.sql b/database/sqlite/migrations/00001_init.sql index 4c1bced..08d0818 100644 --- a/database/sqlite/migrations/00001_init.sql +++ b/database/sqlite/migrations/00001_init.sql @@ -20,7 +20,7 @@ CREATE TABLE IF NOT EXISTS urls ( parsed_url TEXT NOT NULL, fetch_time INTEGER DEFAULT (unixepoch() ), fetch_method INTEGER NOT NULL DEFAULT 0, - expires INTEGER DEFAULT (unixepoch() + 86400), + expires INTEGER DEFAULT (unixepoch() + (86400 * 30)), metadata TEXT, content_text TEXT ) diff --git a/database/sqlite/migrations/00003_feed_refresh.sql b/database/sqlite/migrations/00003_feed_refresh.sql new file mode 100644 index 0000000..89eed96 --- /dev/null +++ b/database/sqlite/migrations/00003_feed_refresh.sql @@ -0,0 +1,29 @@ +-- This migration adds a feed_refresh table to the database. +-- +goose Up +-- +goose StatementBegin + +CREATE TABLE IF NOT EXISTS feed_refresh ( + url TEXT PRIMARY KEY NOT NULL ON CONFLICT REPLACE COLLATE NOCASE, + last_request INTEGER NOT NULL DEFAULT (unixepoch() ), + refresh_interval INTEGER NOT NULL DEFAULT (3600 * 12), + last_refresh INTEGER NOT NULL DEFAULT 0, + idle_timeout INTEGER NOT NULL DEFAULT (86400 * 7) +) +STRICT; + +CREATE INDEX IF NOT EXISTS feed_refresh_url_index ON feed_refresh ( + url ASC +); + +CREATE INDEX IF NOT EXISTS feed_refresh_time_index ON feed_refresh ( + last_refresh ASC, + refresh_interval ASC, + url ASC +); + +-- +goose StatementEnd + +-- +goose Down +-- +goose StatementBegin +DROP TABLE IF EXISTS feed_refresh; +-- +goose StatementEnd \ No newline at end of file diff --git a/fetch/feed/feed.go b/fetch/feed/feed.go index 6061361..3c9c12a 100644 --- a/fetch/feed/feed.go +++ b/fetch/feed/feed.go @@ -18,14 +18,43 @@ const ( DefaultTimeout = 30 * time.Second ) +type option func(*Config) error + +func WithUserAgent(ua string) option { + return func(c *Config) error { + if ua == "" { + return errors.New("user agent must not be empty") + } + c.UserAgent = ua + return nil + } +} + +func WithTimeout(t time.Duration) option { + return func(c *Config) error { + if t <= 0 { + return errors.New("timeout must be positive") + } + c.Timeout = t + return nil + } +} + +func WithClient(client *http.Client) option { + return func(c *Config) error { + c.Client = client + return nil + } +} + var ( - DefaultOptions = Options{ + DefaultConfig = Config{ Timeout: DefaultTimeout, UserAgent: fetch.DefaultUserAgent, } ) -type Options struct { +type Config struct { UserAgent string Timeout time.Duration Client *http.Client @@ -36,21 +65,31 @@ type FeedFetcher struct { timeout time.Duration } -func NewFeedFetcher(options Options) *FeedFetcher { - parser := gofeed.NewParser() - if options.UserAgent != "" { - parser.UserAgent = options.UserAgent +func MustFeedFetcher(options ...option) *FeedFetcher { + f, err := NewFeedFetcher(options...) + if err != nil { + panic(err) } - if options.Client != nil { - parser.Client = options.Client + return f +} + +func NewFeedFetcher(options ...option) (*FeedFetcher, error) { + config := DefaultConfig + for _, opt := range options { + if err := opt(&config); err != nil { + return nil, err + } } - if options.Timeout == 0 { - options.Timeout = DefaultTimeout + parser := gofeed.NewParser() + parser.UserAgent = config.UserAgent + + if config.Client != nil { + parser.Client = config.Client } return &FeedFetcher{ parser: parser, - timeout: options.Timeout, - } + timeout: config.Timeout, + }, nil } func (f *FeedFetcher) Fetch(url *nurl.URL) (*resource.Feed, error) { diff --git a/fetch/feed/feed_test.go b/fetch/feed/feed_test.go index dab3fee..c5955fe 100644 --- a/fetch/feed/feed_test.go +++ b/fetch/feed/feed_test.go @@ -31,10 +31,10 @@ func TestFetchCancelsOnTimeout(t *testing.T) { })) defer ts.Close() client := ts.Client() - options := DefaultOptions - options.Timeout = timeout - options.Client = client - fetcher := NewFeedFetcher(options) + fetcher := MustFeedFetcher( + WithTimeout(timeout), + WithClient(client), + ) url, _ := nurl.Parse(ts.URL) _, err := fetcher.Fetch(url) if err == nil { @@ -59,9 +59,9 @@ func TestFetchReturnsRequestedURL(t *testing.T) { })) defer ts.Close() client := ts.Client() - options := DefaultOptions - options.Client = client - fetcher := NewFeedFetcher(options) + fetcher := MustFeedFetcher( + WithClient(client), + ) url, _ := nurl.Parse(ts.URL) feed, err := fetcher.Fetch(url) if err != nil { @@ -71,3 +71,104 @@ func TestFetchReturnsRequestedURL(t *testing.T) { t.Errorf("Expected URL %s, got %s", url, feed.RequestedURL) } } + +func TestWithTimeout(t *testing.T) { + tests := []struct { + name string + timeout time.Duration + expectErr bool + }{ + { + name: "valid", + timeout: 50 * time.Millisecond, + expectErr: false, + }, + { + name: "negative", + timeout: -1 * time.Millisecond, + expectErr: true, + }, + { + name: "zero", + timeout: 0, + expectErr: true, + }, + } + + for _, tt := range tests { + err := WithTimeout(tt.timeout)(&Config{}) + if tt.expectErr && err == nil { + t.Errorf("Expected error for %s, got nil", tt.timeout) + } else if !tt.expectErr && err != nil { + t.Errorf("Unexpected error for %s: %s", tt.timeout, err) + } + } +} + +func TestWithUserAgentOption(t *testing.T) { + tests := []struct { + name string + ua string + expectErr bool + }{ + { + name: "valid", + ua: "test", + expectErr: false, + }, + { + name: "empty", + ua: "", + expectErr: true, + }, + } + + for _, tt := range tests { + err := WithUserAgent(tt.ua)(&Config{}) + if tt.expectErr && err == nil { + t.Errorf("[%s] Expected error for %s, got nil", tt.name, tt.ua) + } else if !tt.expectErr && err != nil { + t.Errorf("[%s] Unexpected error for %s: %s", tt.name, tt.ua, err) + } + } +} + +func TestUserAgent(t *testing.T) { + tests := []struct { + name string + option option + expected string + }{ + { + name: "default", + option: nil, + expected: fetch.DefaultUserAgent, + }, + { + name: "custom", + option: WithUserAgent("test/1.0"), + expected: "test/1.0", + }, + } + for _, tt := range tests { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.UserAgent() != tt.expected { + t.Errorf("[%s] Expected %s, got %s", tt.name, tt.expected, r.UserAgent()) + } + w.WriteHeader(http.StatusOK) + w.Header().Set("Content-Type", "application/rss+xml") + w.Write([]byte(dummyRSS)) + })) + t.Cleanup(func() { ts.Close() }) + client := ts.Client() + options := []option{WithClient(client)} + if tt.option != nil { + options = append(options, tt.option) + } + fetcher := MustFeedFetcher(options...) + url, _ := nurl.Parse(ts.URL) + if _, err := fetcher.Fetch(url); err != nil { + t.Errorf("[%s] Unexpected error for %s: %s", tt.name, url, err) + } + } +} diff --git a/internal/server/api/server.go b/internal/server/api/server.go index 1a9ef20..57402a5 100644 --- a/internal/server/api/server.go +++ b/internal/server/api/server.go @@ -97,7 +97,7 @@ func NewAPIServer(ctx context.Context, opts ...option) (*Server, error) { return nil, errors.New("no URL fetcher provided") } if ss.feedFetcher == nil { - ss.feedFetcher = feed.NewFeedFetcher(feed.DefaultOptions) + ss.feedFetcher = feed.MustFeedFetcher() } return ss, nil } diff --git a/internal/server/version/version.go b/internal/server/version/version.go index 710e38c..48b809a 100644 --- a/internal/server/version/version.go +++ b/internal/server/version/version.go @@ -1,7 +1,7 @@ package version const ( - Commit = "42a57f1" + Commit = "9bb2a85" Tag = "v0.8.6" RepoURL = "https://github.com/efixler/scrape" ) From 7b8ee47084eb89365c457e611774d32613678118 Mon Sep 17 00:00:00 2001 From: Eric Fixler Date: Fri, 16 Aug 2024 17:25:07 +0100 Subject: [PATCH 2/2] Make feed config type private --- fetch/feed/feed.go | 12 ++++++------ fetch/feed/feed_test.go | 4 ++-- internal/server/version/version.go | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fetch/feed/feed.go b/fetch/feed/feed.go index 3c9c12a..500bfb8 100644 --- a/fetch/feed/feed.go +++ b/fetch/feed/feed.go @@ -18,10 +18,10 @@ const ( DefaultTimeout = 30 * time.Second ) -type option func(*Config) error +type option func(*config) error func WithUserAgent(ua string) option { - return func(c *Config) error { + return func(c *config) error { if ua == "" { return errors.New("user agent must not be empty") } @@ -31,7 +31,7 @@ func WithUserAgent(ua string) option { } func WithTimeout(t time.Duration) option { - return func(c *Config) error { + return func(c *config) error { if t <= 0 { return errors.New("timeout must be positive") } @@ -41,20 +41,20 @@ func WithTimeout(t time.Duration) option { } func WithClient(client *http.Client) option { - return func(c *Config) error { + return func(c *config) error { c.Client = client return nil } } var ( - DefaultConfig = Config{ + DefaultConfig = config{ Timeout: DefaultTimeout, UserAgent: fetch.DefaultUserAgent, } ) -type Config struct { +type config struct { UserAgent string Timeout time.Duration Client *http.Client diff --git a/fetch/feed/feed_test.go b/fetch/feed/feed_test.go index c5955fe..7c2f104 100644 --- a/fetch/feed/feed_test.go +++ b/fetch/feed/feed_test.go @@ -96,7 +96,7 @@ func TestWithTimeout(t *testing.T) { } for _, tt := range tests { - err := WithTimeout(tt.timeout)(&Config{}) + err := WithTimeout(tt.timeout)(&config{}) if tt.expectErr && err == nil { t.Errorf("Expected error for %s, got nil", tt.timeout) } else if !tt.expectErr && err != nil { @@ -124,7 +124,7 @@ func TestWithUserAgentOption(t *testing.T) { } for _, tt := range tests { - err := WithUserAgent(tt.ua)(&Config{}) + err := WithUserAgent(tt.ua)(&config{}) if tt.expectErr && err == nil { t.Errorf("[%s] Expected error for %s, got nil", tt.name, tt.ua) } else if !tt.expectErr && err != nil { diff --git a/internal/server/version/version.go b/internal/server/version/version.go index 48b809a..dbc4384 100644 --- a/internal/server/version/version.go +++ b/internal/server/version/version.go @@ -1,7 +1,7 @@ package version const ( - Commit = "9bb2a85" + Commit = "2b63994" Tag = "v0.8.6" RepoURL = "https://github.com/efixler/scrape" )