From a91ba1cff7900af32b2beb4c3c1b5610ef5fe73c Mon Sep 17 00:00:00 2001 From: Geoffrey Wossum Date: Thu, 17 Oct 2024 18:03:08 -0500 Subject: [PATCH 1/4] feat: add `--pid-file` option to write PID files Add `--pid-file` option to write PID files on startup. The PID filename is specified by the argument after `--pid-file`. Example: `influxd --pid-file /var/lib/influxd/influxd.pid` PID files are automatically removed when the influxd process is shutdown. Closes: 25473 --- cmd/influxd/launcher/cmd.go | 10 +++++++ cmd/influxd/launcher/launcher.go | 39 +++++++++++++++++++++++++++ cmd/influxd/launcher/launcher_test.go | 22 +++++++++++++++ 3 files changed, 71 insertions(+) diff --git a/cmd/influxd/launcher/cmd.go b/cmd/influxd/launcher/cmd.go index f1f123e5c05..a4162a198fa 100644 --- a/cmd/influxd/launcher/cmd.go +++ b/cmd/influxd/launcher/cmd.go @@ -144,6 +144,8 @@ type InfluxdOpts struct { TracingType string ReportingDisabled bool + PIDFile string + AssetsPath string BoltPath string SqLitePath string @@ -213,6 +215,8 @@ func NewOpts(viper *viper.Viper) *InfluxdOpts { FluxLogEnabled: false, ReportingDisabled: false, + PIDFile: "", + BoltPath: filepath.Join(dir, bolt.DefaultFilename), SqLitePath: filepath.Join(dir, sqlite.DefaultFilename), EnginePath: filepath.Join(dir, "engine"), @@ -325,6 +329,12 @@ func (o *InfluxdOpts) BindCliOpts() []cli.Opt { Default: o.ReportingDisabled, Desc: "disable sending telemetry data to https://telemetry.influxdata.com every 8 hours", }, + { + DestP: &o.PIDFile, + Flag: "pid-file", + Default: o.PIDFile, + Desc: "write process ID to a file", + }, { DestP: &o.SessionLength, Flag: "session-length", diff --git a/cmd/influxd/launcher/launcher.go b/cmd/influxd/launcher/launcher.go index 215788f5d7b..f17806eefdd 100644 --- a/cmd/influxd/launcher/launcher.go +++ b/cmd/influxd/launcher/launcher.go @@ -9,6 +9,7 @@ import ( nethttp "net/http" "os" "path/filepath" + "strconv" "strings" "sync" "time" @@ -248,6 +249,10 @@ func (m *Launcher) run(ctx context.Context, opts *InfluxdOpts) (err error) { } } + if err := m.writePIDFile(opts.PIDFile); err != nil { + return fmt.Errorf("error writing PIDFile %q: %w", opts.PIDFile, err) + } + m.reg = prom.NewRegistry(m.log.With(zap.String("service", "prom_registry"))) m.reg.MustRegister(collectors.NewGoCollector()) @@ -970,6 +975,40 @@ func (m *Launcher) initTracing(opts *InfluxdOpts) { } } +// writePIDFile will write the process ID to pidFile and register a cleanup function to delete it during +// shutdown. If pidFile is empty, then no PID file is written and no cleanup function is registered. +func (m *Launcher) writePIDFile(pidFile string) error { + if pidFile == "" { + return nil + } + + // Create directory to PIDfile if needed. + if err := os.MkdirAll(filepath.Dir(pidFile), 0777); err != nil { + return fmt.Errorf("mkdir: %w", err) + } + + // Write PID to file + pidStr := strconv.Itoa(os.Getpid()) + if writeErr := os.WriteFile(pidFile, []byte(pidStr), 0666); writeErr != nil { + // Let's make sure we don't leave a PID file behind on error. + removeErr := os.Remove(pidFile) + return fmt.Errorf("write file: %w; remove file: %w", writeErr, removeErr) + } + + // Add a cleanup function. + m.closers = append(m.closers, labeledCloser{ + label: "pidfile", + closer: func(context.Context) error { + if err := os.Remove(pidFile); err != nil { + return fmt.Errorf("removing PID file %q: %w", pidFile, err) + } + return nil + }, + }) + + return nil +} + // openMetaStores opens the embedded DBs used to store metadata about influxd resources, migrating them to // the latest schema expected by the server. // On success, a unique ID is returned to be used as an identifier for the influxd instance in telemetry. diff --git a/cmd/influxd/launcher/launcher_test.go b/cmd/influxd/launcher/launcher_test.go index 1db0c0972d4..67c56f870e3 100644 --- a/cmd/influxd/launcher/launcher_test.go +++ b/cmd/influxd/launcher/launcher_test.go @@ -5,6 +5,9 @@ import ( "encoding/json" "io" nethttp "net/http" + "os" + "path/filepath" + "strconv" "testing" "time" @@ -14,6 +17,7 @@ import ( "github.com/influxdata/influxdb/v2/http" "github.com/influxdata/influxdb/v2/tenant" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) // Default context. @@ -164,3 +168,21 @@ func TestLauncher_PingHeaders(t *testing.T) { assert.Equal(t, []string{"OSS"}, resp.Header.Values("X-Influxdb-Build")) assert.Equal(t, []string{"dev"}, resp.Header.Values("X-Influxdb-Version")) } + +func TestLauncher_PIDFile(t *testing.T) { + pidDir := t.TempDir() + pidFilename := filepath.Join(pidDir, "influxd.pid") + + l := launcher.RunAndSetupNewLauncherOrFail(ctx, t, func(o *launcher.InfluxdOpts) { + o.PIDFile = pidFilename + }) + defer func() { + l.ShutdownOrFail(t, ctx) + require.NoFileExists(t, pidFilename) + }() + + require.FileExists(t, pidFilename) + pidBytes, err := os.ReadFile(pidFilename) + require.NoError(t, err) + require.Equal(t, strconv.Itoa(os.Getpid()), string(pidBytes)) +} From fe8e321c118d8911a5d963f12e093e77bb59f862 Mon Sep 17 00:00:00 2001 From: Geoffrey Wossum Date: Tue, 22 Oct 2024 15:55:10 -0500 Subject: [PATCH 2/4] feat: use PID file as lock and add `--overwrite-pid-file` Change default behavior to abort with an error if the PID file is requested but it already exists. Add `-ooverwrite-pid-file` flag to change behavior to attempt to overwrite PID file if the file exists instead of aborting. --- cmd/influxd/launcher/cmd.go | 12 ++- cmd/influxd/launcher/launcher.go | 66 ++++++++++++--- cmd/influxd/launcher/launcher_helpers.go | 7 +- cmd/influxd/launcher/launcher_test.go | 102 +++++++++++++++++++++++ 4 files changed, 171 insertions(+), 16 deletions(-) diff --git a/cmd/influxd/launcher/cmd.go b/cmd/influxd/launcher/cmd.go index a4162a198fa..5fd1ffbb6f1 100644 --- a/cmd/influxd/launcher/cmd.go +++ b/cmd/influxd/launcher/cmd.go @@ -144,7 +144,8 @@ type InfluxdOpts struct { TracingType string ReportingDisabled bool - PIDFile string + PIDFile string + OverwritePIDFile bool AssetsPath string BoltPath string @@ -215,7 +216,8 @@ func NewOpts(viper *viper.Viper) *InfluxdOpts { FluxLogEnabled: false, ReportingDisabled: false, - PIDFile: "", + PIDFile: "", + OverwritePIDFile: false, BoltPath: filepath.Join(dir, bolt.DefaultFilename), SqLitePath: filepath.Join(dir, sqlite.DefaultFilename), @@ -335,6 +337,12 @@ func (o *InfluxdOpts) BindCliOpts() []cli.Opt { Default: o.PIDFile, Desc: "write process ID to a file", }, + { + DestP: &o.OverwritePIDFile, + Flag: "overwrite-pid-file", + Default: o.OverwritePIDFile, + Desc: "overwrite PID file if it already exists instead of exiting", + }, { DestP: &o.SessionLength, Flag: "session-length", diff --git a/cmd/influxd/launcher/launcher.go b/cmd/influxd/launcher/launcher.go index f17806eefdd..2edc391d04e 100644 --- a/cmd/influxd/launcher/launcher.go +++ b/cmd/influxd/launcher/launcher.go @@ -5,6 +5,7 @@ import ( "crypto/tls" "errors" "fmt" + "io/fs" "net" nethttp "net/http" "os" @@ -110,6 +111,11 @@ const ( JaegerTracing = "jaeger" ) +var ( + // ErrPIDFileExists indicates that a PID file already exists. + ErrPIDFileExists = errors.New("PID file exists (possible unclean shutdown or another instance already running)") +) + type labeledCloser struct { label string closer func(context.Context) error @@ -249,7 +255,7 @@ func (m *Launcher) run(ctx context.Context, opts *InfluxdOpts) (err error) { } } - if err := m.writePIDFile(opts.PIDFile); err != nil { + if err := m.writePIDFile(opts.PIDFile, opts.OverwritePIDFile); err != nil { return fmt.Errorf("error writing PIDFile %q: %w", opts.PIDFile, err) } @@ -975,32 +981,66 @@ func (m *Launcher) initTracing(opts *InfluxdOpts) { } } -// writePIDFile will write the process ID to pidFile and register a cleanup function to delete it during -// shutdown. If pidFile is empty, then no PID file is written and no cleanup function is registered. -func (m *Launcher) writePIDFile(pidFile string) error { - if pidFile == "" { +// writePIDFile will write the process ID to pidFilename and register a cleanup function to delete it during +// shutdown. If pidFilename is empty, then no PID file is written and no cleanup function is registered. +// If pidFilename already exists and overwrite is false, then pidFilename is not overwritten and a +// ErrPIDFileExists error is returned. If pidFilename already exists and overwrite is true, then pidFilename +// will be overwritten but a warning will be logged. +func (m *Launcher) writePIDFile(pidFilename string, overwrite bool) error { + if pidFilename == "" { return nil } // Create directory to PIDfile if needed. - if err := os.MkdirAll(filepath.Dir(pidFile), 0777); err != nil { + if err := os.MkdirAll(filepath.Dir(pidFilename), 0777); err != nil { return fmt.Errorf("mkdir: %w", err) } - // Write PID to file - pidStr := strconv.Itoa(os.Getpid()) - if writeErr := os.WriteFile(pidFile, []byte(pidStr), 0666); writeErr != nil { + // Write PID to file, but don't clobber an existing PID file. + pidBytes := []byte(strconv.Itoa(os.Getpid())) + pidMode := fs.FileMode(0666) + openFlags := os.O_WRONLY | os.O_CREATE | os.O_TRUNC + pidFile, err := os.OpenFile(pidFilename, openFlags|os.O_EXCL, pidMode) + if err != nil { + if !errors.Is(err, os.ErrExist) { + return fmt.Errorf("open file: %w", err) + } + if !overwrite { + return ErrPIDFileExists + } else { + m.log.Warn("PID file already exists, attempting to overwrite", zap.String("pidFile", pidFilename)) + pidFile, err = os.OpenFile(pidFilename, openFlags, pidMode) + if err != nil { + return fmt.Errorf("overwrite file: %w", err) + } + } + } + _, writeErr := pidFile.Write(pidBytes) // Contract says Write must return an error if count < len(pidBytes). + closeErr := pidFile.Close() // always close the file + if writeErr != nil || closeErr != nil { + var errs []error + if writeErr != nil { + errs = append(errs, fmt.Errorf("write file: %w", writeErr)) + } + if closeErr != nil { + errs = append(errs, fmt.Errorf("close file: %w", closeErr)) + } + // Let's make sure we don't leave a PID file behind on error. - removeErr := os.Remove(pidFile) - return fmt.Errorf("write file: %w; remove file: %w", writeErr, removeErr) + removeErr := os.Remove(pidFilename) + if removeErr != nil { + errs = append(errs, fmt.Errorf("remove file: %w", removeErr)) + } + + return errors.Join(errs...) } // Add a cleanup function. m.closers = append(m.closers, labeledCloser{ label: "pidfile", closer: func(context.Context) error { - if err := os.Remove(pidFile); err != nil { - return fmt.Errorf("removing PID file %q: %w", pidFile, err) + if err := os.Remove(pidFilename); err != nil { + return fmt.Errorf("removing PID file %q: %w", pidFilename, err) } return nil }, diff --git a/cmd/influxd/launcher/launcher_helpers.go b/cmd/influxd/launcher/launcher_helpers.go index 3cb48ee8306..02d2f036669 100644 --- a/cmd/influxd/launcher/launcher_helpers.go +++ b/cmd/influxd/launcher/launcher_helpers.go @@ -55,6 +55,8 @@ type TestLauncher struct { Bucket *influxdb.Bucket Auth *influxdb.Authorization + Logger *zap.Logger + httpClient *httpc.Client apiClient *api.APIClient @@ -146,7 +148,10 @@ func (tl *TestLauncher) Run(tb zaptest.TestingT, ctx context.Context, setters .. } // Set up top-level logger to write into the test-case. - tl.Launcher.log = zaptest.NewLogger(tb, zaptest.Level(opts.LogLevel)).With(zap.String("test_name", tb.Name())) + if tl.Logger == nil { + tl.Logger = zaptest.NewLogger(tb, zaptest.Level(opts.LogLevel)).With(zap.String("test_name", tb.Name())) + } + tl.Launcher.log = tl.Logger return tl.Launcher.run(ctx, opts) } diff --git a/cmd/influxd/launcher/launcher_test.go b/cmd/influxd/launcher/launcher_test.go index 67c56f870e3..679eef22c29 100644 --- a/cmd/influxd/launcher/launcher_test.go +++ b/cmd/influxd/launcher/launcher_test.go @@ -3,6 +3,7 @@ package launcher_test import ( "context" "encoding/json" + "fmt" "io" nethttp "net/http" "os" @@ -18,6 +19,9 @@ import ( "github.com/influxdata/influxdb/v2/tenant" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" + "go.uber.org/zap/zaptest/observer" ) // Default context. @@ -186,3 +190,101 @@ func TestLauncher_PIDFile(t *testing.T) { require.NoError(t, err) require.Equal(t, strconv.Itoa(os.Getpid()), string(pidBytes)) } + +func TestLauncher_PIDFile_Locked(t *testing.T) { + pidDir := t.TempDir() + pidFilename := filepath.Join(pidDir, "influxd.pid") + lockContents := []byte("foobar") // something wouldn't appear in normal lock file + + // Write PID file to lock out the launcher. + require.NoError(t, os.WriteFile(pidFilename, lockContents, 0666)) + require.FileExists(t, pidFilename) + origSt, err := os.Stat(pidFilename) + require.NoError(t, err) + + // Make sure we get an error about the PID file from the launcher + l := launcher.NewTestLauncher() + err = l.Run(t, ctx, func(o *launcher.InfluxdOpts) { + o.PIDFile = pidFilename + }) + defer func() { + l.ShutdownOrFail(t, ctx) + + require.FileExists(t, pidFilename) + contents, err := os.ReadFile(pidFilename) + require.NoError(t, err) + require.Equal(t, lockContents, contents) + curSt, err := os.Stat(pidFilename) + require.NoError(t, err) + require.Equal(t, origSt, curSt) + }() + + require.ErrorIs(t, err, launcher.ErrPIDFileExists) + require.ErrorContains(t, err, fmt.Sprintf("error writing PIDFile %q: PID file exists (possible unclean shutdown or another instance already running)", pidFilename)) +} + +func TestLauncher_PIDFile_Overwrite(t *testing.T) { + pidDir := t.TempDir() + pidFilename := filepath.Join(pidDir, "influxd.pid") + lockContents := []byte("foobar") // something wouldn't appear in normal lock file + + // Write PID file to lock out the launcher (or not in this case). + require.NoError(t, os.WriteFile(pidFilename, lockContents, 0666)) + require.FileExists(t, pidFilename) + + // Make sure we get an error about the PID file from the launcher. + l := launcher.NewTestLauncher() + loggerCore, ol := observer.New(zap.WarnLevel) + l.Logger = zap.New(loggerCore) + err := l.Run(t, ctx, func(o *launcher.InfluxdOpts) { + o.PIDFile = pidFilename + o.OverwritePIDFile = true + }) + defer func() { + l.ShutdownOrFail(t, ctx) + + require.NoFileExists(t, pidFilename) + }() + require.NoError(t, err) + + expLogs := []observer.LoggedEntry{ + { + Entry: zapcore.Entry{Level: zap.WarnLevel, Message: "PID file already exists, attempting to overwrite"}, + Context: []zapcore.Field{zap.String("pidFile", pidFilename)}, + }, + } + require.Equal(t, expLogs, ol.AllUntimed()) + require.FileExists(t, pidFilename) + pidBytes, err := os.ReadFile(pidFilename) + require.NoError(t, err) + require.Equal(t, strconv.Itoa(os.Getpid()), string(pidBytes)) +} + +func TestLauncher_PIDFile_OverwriteFail(t *testing.T) { + pidDir := t.TempDir() + pidFilename := filepath.Join(pidDir, "influxd.pid") + lockContents := []byte("foobar") // something wouldn't appear in normal lock file + + // Write PID file to lock out the launcher. + require.NoError(t, os.WriteFile(pidFilename, lockContents, 0666)) + require.FileExists(t, pidFilename) + require.NoError(t, os.Chmod(pidFilename, 0000)) + + // Make sure we get an error about the PID file from the launcher + l := launcher.NewTestLauncher() + err := l.Run(t, ctx, func(o *launcher.InfluxdOpts) { + o.PIDFile = pidFilename + o.OverwritePIDFile = true + }) + defer func() { + l.ShutdownOrFail(t, ctx) + + require.FileExists(t, pidFilename) + require.NoError(t, os.Chmod(pidFilename, 0644)) + pidBytes, err := os.ReadFile(pidFilename) + require.NoError(t, err) + require.Equal(t, lockContents, pidBytes) + }() + + require.ErrorContains(t, err, fmt.Sprintf("error writing PIDFile %[1]q: overwrite file: open %[1]s: permission denied", pidFilename)) +} From 6f679da00916b432e0bb8497194150e012bccbae Mon Sep 17 00:00:00 2001 From: Geoffrey Wossum Date: Tue, 22 Oct 2024 17:14:59 -0500 Subject: [PATCH 3/4] chore: more portable error checking --- cmd/influxd/launcher/launcher.go | 2 +- cmd/influxd/launcher/launcher_test.go | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cmd/influxd/launcher/launcher.go b/cmd/influxd/launcher/launcher.go index 2edc391d04e..115a1495b23 100644 --- a/cmd/influxd/launcher/launcher.go +++ b/cmd/influxd/launcher/launcher.go @@ -1002,7 +1002,7 @@ func (m *Launcher) writePIDFile(pidFilename string, overwrite bool) error { openFlags := os.O_WRONLY | os.O_CREATE | os.O_TRUNC pidFile, err := os.OpenFile(pidFilename, openFlags|os.O_EXCL, pidMode) if err != nil { - if !errors.Is(err, os.ErrExist) { + if !errors.Is(err, fs.ErrExist) { return fmt.Errorf("open file: %w", err) } if !overwrite { diff --git a/cmd/influxd/launcher/launcher_test.go b/cmd/influxd/launcher/launcher_test.go index 679eef22c29..0c1ec2e9f14 100644 --- a/cmd/influxd/launcher/launcher_test.go +++ b/cmd/influxd/launcher/launcher_test.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "io" + "io/fs" nethttp "net/http" "os" "path/filepath" @@ -286,5 +287,6 @@ func TestLauncher_PIDFile_OverwriteFail(t *testing.T) { require.Equal(t, lockContents, pidBytes) }() - require.ErrorContains(t, err, fmt.Sprintf("error writing PIDFile %[1]q: overwrite file: open %[1]s: permission denied", pidFilename)) + require.ErrorContains(t, err, fmt.Sprintf("error writing PIDFile %[1]q: overwrite file: open %[1]s:", pidFilename)) + require.ErrorIs(t, err, fs.ErrPermission) } From be8412e03bc5ae9d8dd1d06fd88d1c67015f79a7 Mon Sep 17 00:00:00 2001 From: Geoffrey Wossum Date: Tue, 22 Oct 2024 17:44:06 -0500 Subject: [PATCH 4/4] chore: fix test for darwin --- cmd/influxd/launcher/launcher_test.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cmd/influxd/launcher/launcher_test.go b/cmd/influxd/launcher/launcher_test.go index 0c1ec2e9f14..1924d715f36 100644 --- a/cmd/influxd/launcher/launcher_test.go +++ b/cmd/influxd/launcher/launcher_test.go @@ -9,6 +9,7 @@ import ( nethttp "net/http" "os" "path/filepath" + "runtime" "strconv" "testing" "time" @@ -217,7 +218,12 @@ func TestLauncher_PIDFile_Locked(t *testing.T) { require.Equal(t, lockContents, contents) curSt, err := os.Stat(pidFilename) require.NoError(t, err) - require.Equal(t, origSt, curSt) + + // CircleCI test runners for darwin don't have `noatime` / `relatime`, so + // the atime will differ, which is inside the system specific data. + if runtime.GOOS != "darwin" { + require.Equal(t, origSt, curSt) + } }() require.ErrorIs(t, err, launcher.ErrPIDFileExists)