From 4b2b485366b51bf25c67e19740773adfc4778462 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Caner=20=C3=87=C4=B1dam?= Date: Sun, 20 Aug 2023 00:53:47 +0200 Subject: [PATCH 1/8] implement image cleanup and separate it from bot teardown --- Makefile | 1 + clients/docker/client.go | 12 +++ clients/interfaces.go | 1 + clients/mocks/mock_clients.go | 15 +++ config/agents.go | 5 +- services/components/components.go | 11 ++- services/components/containers/bot_client.go | 19 ++-- .../components/containers/bot_client_test.go | 3 +- .../components/containers/image_cleanup.go | 99 +++++++++++++++++++ .../containers/image_cleanup_test.go | 94 ++++++++++++++++++ .../containers/mocks/mock_bot_client.go | 8 +- .../containers/mocks/mock_image_cleanup.go | 49 +++++++++ services/components/lifecycle/bot_manager.go | 6 +- .../components/lifecycle/bot_manager_test.go | 10 +- .../components/lifecycle/lifecycle_test.go | 2 +- services/supervisor/bots.go | 3 + services/supervisor/bots_test.go | 8 +- 17 files changed, 313 insertions(+), 33 deletions(-) create mode 100644 services/components/containers/image_cleanup.go create mode 100644 services/components/containers/image_cleanup_test.go create mode 100644 services/components/containers/mocks/mock_image_cleanup.go diff --git a/Makefile b/Makefile index e38e5db0..52a8785c 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,7 @@ mocks: mockgen -source services/components/lifecycle/bot_manager.go -destination services/components/lifecycle/mocks/mock_bot_manager.go mockgen -source services/components/lifecycle/bot_monitor.go -destination services/components/lifecycle/mocks/mock_bot_monitor.go mockgen -source services/components/containers/bot_client.go -destination services/components/containers/mocks/mock_bot_client.go + mockgen -source services/components/containers/image_cleanup.go -destination services/components/containers/mocks/mock_image_cleanup.go mockgen -source services/components/botio/sender.go -destination services/components/botio/mocks/mock_sender.go mockgen -source services/components/botio/bot_client.go -destination services/components/botio/mocks/mock_bot_client.go mockgen -source services/components/botio/bot_client_factory.go -destination services/components/botio/mocks/mock_bot_client_factory.go diff --git a/clients/docker/client.go b/clients/docker/client.go index ebdc1ef5..35742301 100644 --- a/clients/docker/client.go +++ b/clients/docker/client.go @@ -786,6 +786,18 @@ func (d *dockerClient) EnsureLocalImages(ctx context.Context, timeoutPerPull tim return } +// ListImages lists all images. +func (d *dockerClient) ListImages(ctx context.Context) (imgs []string, err error) { + imgSummaries, err := d.cli.ImageList(ctx, types.ImageListOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to list locally available images: %v", err) + } + for _, imgSummary := range imgSummaries { + imgs = append(imgs, imgSummary.RepoTags...) + } + return +} + // GetContainerLogs gets the container logs. func (d *dockerClient) GetContainerLogs(ctx context.Context, containerID, tail string, truncate int) (string, error) { r, err := d.cli.ContainerLogs(ctx, containerID, types.ContainerLogsOptions{ diff --git a/clients/interfaces.go b/clients/interfaces.go index bf8fc11a..6cf03ac9 100644 --- a/clients/interfaces.go +++ b/clients/interfaces.go @@ -43,6 +43,7 @@ type DockerClient interface { HasLocalImage(ctx context.Context, ref string) (bool, error) EnsureLocalImage(ctx context.Context, name, ref string) error EnsureLocalImages(ctx context.Context, timeoutPerPull time.Duration, imagePulls []docker.ImagePull) []error + ListImages(ctx context.Context) ([]string, error) GetContainerLogs(ctx context.Context, containerID, tail string, truncate int) (string, error) GetContainerFromRemoteAddr(ctx context.Context, hostPort string) (*types.Container, error) SetImagePullCooldown(threshold int, cooldownDuration time.Duration) diff --git a/clients/mocks/mock_clients.go b/clients/mocks/mock_clients.go index 2dc93af8..225d3c8b 100644 --- a/clients/mocks/mock_clients.go +++ b/clients/mocks/mock_clients.go @@ -275,6 +275,21 @@ func (mr *MockDockerClientMockRecorder) InterruptContainer(ctx, id interface{}) return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InterruptContainer", reflect.TypeOf((*MockDockerClient)(nil).InterruptContainer), ctx, id) } +// ListImages mocks base method. +func (m *MockDockerClient) ListImages(ctx context.Context) ([]string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ListImages", ctx) + ret0, _ := ret[0].([]string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ListImages indicates an expected call of ListImages. +func (mr *MockDockerClientMockRecorder) ListImages(ctx interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListImages", reflect.TypeOf((*MockDockerClient)(nil).ListImages), ctx) +} + // Nuke mocks base method. func (m *MockDockerClient) Nuke(ctx context.Context) error { m.ctrl.T.Helper() diff --git a/config/agents.go b/config/agents.go index 3cfe7250..af6e34c4 100644 --- a/config/agents.go +++ b/config/agents.go @@ -10,8 +10,9 @@ import ( ) const ( - AgentGrpcPort = "50051" - HeartbeatBotID = "0x37ce3df7facea4bd95619655347ffd66f50909d100fa74ae042f122a8b024c29" + AgentGrpcPort = "50051" + HeartbeatBotID = "0x37ce3df7facea4bd95619655347ffd66f50909d100fa74ae042f122a8b024c29" + HeartbeatBotImage = "bafybeicrbhj7ekrl6njozgpncl3jz4y64imxuilomkmaolztbyat5dtxri" ) type AgentConfig struct { diff --git a/services/components/components.go b/services/components/components.go index 9f9e64b1..018df3a8 100644 --- a/services/components/components.go +++ b/services/components/components.go @@ -76,8 +76,9 @@ type BotLifecycleConfig struct { // BotLifecycle contains the bot lifecycle components. type BotLifecycle struct { - BotManager lifecycle.BotLifecycleManager - BotClient containers.BotClient + BotManager lifecycle.BotLifecycleManager + BotClient containers.BotClient + ImageCleanup containers.ImageCleanup } // GetBotLifecycleComponents returns the bot lifecycle management components. @@ -118,9 +119,11 @@ func GetBotLifecycleComponents(ctx context.Context, botLifeConfig BotLifecycleCo botLifeConfig.BotRegistry, botClient, lifecycleMediator, lifecycleMetrics, botMonitor, ) + imageCleanup := containers.NewImageCleanup(dockerClient, config.HeartbeatBotImage) return BotLifecycle{ - BotManager: botManager, - BotClient: botClient, + BotManager: botManager, + BotClient: botClient, + ImageCleanup: imageCleanup, }, nil } diff --git a/services/components/containers/bot_client.go b/services/components/containers/bot_client.go index 0ef47af3..c46b0a07 100644 --- a/services/components/containers/bot_client.go +++ b/services/components/containers/bot_client.go @@ -27,7 +27,7 @@ const ( type BotClient interface { EnsureBotImages(ctx context.Context, botConfigs []config.AgentConfig) []error LaunchBot(ctx context.Context, botConfig config.AgentConfig) error - TearDownBot(ctx context.Context, containerName string, removeImage bool) error + TearDownBot(ctx context.Context, containerName string) error StopBot(ctx context.Context, botConfig config.AgentConfig) error LoadBotContainers(ctx context.Context) ([]types.Container, error) StartWaitBotContainer(ctx context.Context, containerID string) error @@ -139,8 +139,8 @@ func getServiceContainerNames() []string { } } -// TearDownBot tears down a bot by shutting down the docker container and removing it. -func (bc *botClient) TearDownBot(ctx context.Context, containerName string, removeImage bool) error { +// TearDownBot tears down a bot removing the docker container and network. +func (bc *botClient) TearDownBot(ctx context.Context, containerName string) error { container, err := bc.client.GetContainerByName(ctx, containerName) if err != nil { return fmt.Errorf("failed to get the bot container to tear down: %v", err) @@ -191,14 +191,11 @@ func (bc *botClient) TearDownBot(ctx context.Context, containerName string, remo }, ).WithError(err).Warn("failed to destroy the bot network") } - if !removeImage { - return nil - } - if err := bc.client.RemoveImage(ctx, container.Image); err != nil { - log.WithFields(log.Fields{ - "image": container.Image, - }).WithError(err).Warn("failed to remove image of the destroyed bot container") - } + // if err := bc.client.RemoveImage(ctx, container.Image); err != nil { + // log.WithFields(log.Fields{ + // "image": container.Image, + // }).WithError(err).Warn("failed to remove image of the destroyed bot container") + // } return nil } diff --git a/services/components/containers/bot_client_test.go b/services/components/containers/bot_client_test.go index 9be1a149..82ea34c2 100644 --- a/services/components/containers/bot_client_test.go +++ b/services/components/containers/bot_client_test.go @@ -154,9 +154,8 @@ func (s *BotClientTestSuite) TestTearDownBot() { s.client.EXPECT().ShutdownContainer(gomock.Any(), testContainerID2, &timeout).Return(testErr) s.client.EXPECT().RemoveContainer(gomock.Any(), testContainerID2).Return(testErr) s.client.EXPECT().RemoveNetworkByName(gomock.Any(), botConfig.ContainerName()).Return(testErr) - s.client.EXPECT().RemoveImage(gomock.Any(), testImageRef).Return(testErr) - s.r.NoError(s.botClient.TearDownBot(context.Background(), botConfig.ContainerName(), true)) + s.r.NoError(s.botClient.TearDownBot(context.Background(), botConfig.ContainerName())) } func (s *BotClientTestSuite) TestStopBot() { diff --git a/services/components/containers/image_cleanup.go b/services/components/containers/image_cleanup.go new file mode 100644 index 00000000..e0cdf034 --- /dev/null +++ b/services/components/containers/image_cleanup.go @@ -0,0 +1,99 @@ +package containers + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/forta-network/forta-node/clients" + "github.com/forta-network/forta-node/clients/docker" + "github.com/sirupsen/logrus" +) + +const imageCleanupInterval = time.Hour * 1 + +// ImageCleanup deals with image cleanup. +type ImageCleanup interface { + Do(context.Context) error +} + +type imageCleanup struct { + client clients.DockerClient + lastCleanup time.Time + exclusionList []string +} + +// NewImageCleanup creates new. +func NewImageCleanup(client clients.DockerClient, excludeImages ...string) *imageCleanup { + return &imageCleanup{ + client: client, + exclusionList: excludeImages, + } +} + +// Do does the image cleanup by finding all unused Disco images and removing them. +// The logic executes only after an interval. +func (ic *imageCleanup) Do(ctx context.Context) error { + if time.Since(ic.lastCleanup) < imageCleanupInterval { + return nil + } + + containers, err := ic.client.GetContainers(ctx) + if err != nil { + return fmt.Errorf("failed to get containers during image cleanup: %v", err) + } + + images, err := ic.client.ListImages(ctx) + if err != nil { + return fmt.Errorf("failed to list images during image cleanup: %v", err) + } + + for _, image := range images { + logger := logrus.WithField("image", image) + + if ic.isExcluded(image) { + logger.Debug("image is excluded - skipping cleanup") + continue + } + + if ic.isImageInUse(containers, image) { + logger.Debug("image is in use - skipping cleanup") + continue + } + + if err := ic.client.RemoveImage(ctx, image); err != nil { + logger.WithError(err).Warn("failed to cleanup unused disco image") + } else { + logger.Info("successfully cleaned up unused image") + } + } + + ic.lastCleanup = time.Now() + return nil +} + +func (ic *imageCleanup) isExcluded(ref string) bool { + // needs to be a Disco image + if !strings.Contains(ref, "bafybei") { + return true + } + + for _, excluded := range ic.exclusionList { + // expecting the ref to include the excluded ref because + // we specify it and it can be a subset of the full reference + if strings.Contains(ref, excluded) { + return true + } + } + return false +} + +func (lc *imageCleanup) isImageInUse(containers docker.ContainerList, image string) bool { + for _, container := range containers { + if container.Image == image { + return true + } + } + return false +} diff --git a/services/components/containers/image_cleanup_test.go b/services/components/containers/image_cleanup_test.go new file mode 100644 index 00000000..e373f336 --- /dev/null +++ b/services/components/containers/image_cleanup_test.go @@ -0,0 +1,94 @@ +package containers + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/forta-network/forta-node/clients/docker" + mock_clients "github.com/forta-network/forta-node/clients/mocks" + "github.com/golang/mock/gomock" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" +) + +const ( + testCleanupImage1 = "bafybeitestcleanupimage1" + testCleanupImage2 = "bafybeitestcleanupimage2" + testCleanupImage3 = "bafybeitestcleanupimage3" +) + +type ImageCleanupTestSuite struct { + r *require.Assertions + + client *mock_clients.MockDockerClient + + imageCleanup *imageCleanup + + suite.Suite +} + +func TestImageCleanupTestSuite(t *testing.T) { + suite.Run(t, &ImageCleanupTestSuite{}) +} + +func (s *ImageCleanupTestSuite) SetupTest() { + s.r = s.Require() + + ctrl := gomock.NewController(s.T()) + s.client = mock_clients.NewMockDockerClient(ctrl) + + s.imageCleanup = NewImageCleanup(s.client) +} + +func (s *ImageCleanupTestSuite) TestIntervalSkip() { + s.imageCleanup.lastCleanup = time.Now() // very close cleanup time + + // no calls expected + + s.r.NoError(s.imageCleanup.Do(context.Background())) +} + +func (s *ImageCleanupTestSuite) TestContainerListError() { + s.client.EXPECT().GetContainers(gomock.Any()).Return(nil, errors.New("test error")) + + s.r.Error(s.imageCleanup.Do(context.Background())) +} + +func (s *ImageCleanupTestSuite) TestImagesListError() { + s.client.EXPECT().GetContainers(gomock.Any()).Return(docker.ContainerList{}, nil) + s.client.EXPECT().ListImages(gomock.Any()).Return(nil, errors.New("test error")) + + s.r.Error(s.imageCleanup.Do(context.Background())) +} + +func (s *ImageCleanupTestSuite) TestRemoveImageError() { + initialLastCleanup := s.imageCleanup.lastCleanup + + s.client.EXPECT().GetContainers(gomock.Any()).Return(docker.ContainerList{}, nil) + s.client.EXPECT().ListImages(gomock.Any()).Return([]string{testCleanupImage1}, nil) + s.client.EXPECT().RemoveImage(gomock.Any(), testCleanupImage1).Return(errors.New("test error")) + + // no error and mutated last cleanup timestamp: removal errors do not affect this + s.r.NoError(s.imageCleanup.Do(context.Background())) + s.r.NotEqual(initialLastCleanup, s.imageCleanup.lastCleanup) +} + +func (s *ImageCleanupTestSuite) TestCleanupSuccess() { + initialLastCleanup := s.imageCleanup.lastCleanup + + s.imageCleanup.exclusionList = []string{testCleanupImage1} // excluding image + s.client.EXPECT().GetContainers(gomock.Any()).Return(docker.ContainerList{ + { + Image: testCleanupImage2, // image in use by container + }, + }, nil) + s.client.EXPECT().ListImages(gomock.Any()).Return( + []string{testCleanupImage1, testCleanupImage2, testCleanupImage3}, nil, + ) + s.client.EXPECT().RemoveImage(gomock.Any(), testCleanupImage3).Return(nil) // only removes image 3 + + s.r.NoError(s.imageCleanup.Do(context.Background())) + s.r.NotEqual(initialLastCleanup, s.imageCleanup.lastCleanup) +} diff --git a/services/components/containers/mocks/mock_bot_client.go b/services/components/containers/mocks/mock_bot_client.go index 60ae10c1..5cee2353 100644 --- a/services/components/containers/mocks/mock_bot_client.go +++ b/services/components/containers/mocks/mock_bot_client.go @@ -108,15 +108,15 @@ func (mr *MockBotClientMockRecorder) StopBot(ctx, botConfig interface{}) *gomock } // TearDownBot mocks base method. -func (m *MockBotClient) TearDownBot(ctx context.Context, containerName string, removeImage bool) error { +func (m *MockBotClient) TearDownBot(ctx context.Context, containerName string) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "TearDownBot", ctx, containerName, removeImage) + ret := m.ctrl.Call(m, "TearDownBot", ctx, containerName) ret0, _ := ret[0].(error) return ret0 } // TearDownBot indicates an expected call of TearDownBot. -func (mr *MockBotClientMockRecorder) TearDownBot(ctx, containerName, removeImage interface{}) *gomock.Call { +func (mr *MockBotClientMockRecorder) TearDownBot(ctx, containerName interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "TearDownBot", reflect.TypeOf((*MockBotClient)(nil).TearDownBot), ctx, containerName, removeImage) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "TearDownBot", reflect.TypeOf((*MockBotClient)(nil).TearDownBot), ctx, containerName) } diff --git a/services/components/containers/mocks/mock_image_cleanup.go b/services/components/containers/mocks/mock_image_cleanup.go new file mode 100644 index 00000000..ae7dd899 --- /dev/null +++ b/services/components/containers/mocks/mock_image_cleanup.go @@ -0,0 +1,49 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: services/components/containers/image_cleanup.go + +// Package mock_containers is a generated GoMock package. +package mock_containers + +import ( + context "context" + reflect "reflect" + + gomock "github.com/golang/mock/gomock" +) + +// MockImageCleanup is a mock of ImageCleanup interface. +type MockImageCleanup struct { + ctrl *gomock.Controller + recorder *MockImageCleanupMockRecorder +} + +// MockImageCleanupMockRecorder is the mock recorder for MockImageCleanup. +type MockImageCleanupMockRecorder struct { + mock *MockImageCleanup +} + +// NewMockImageCleanup creates a new mock instance. +func NewMockImageCleanup(ctrl *gomock.Controller) *MockImageCleanup { + mock := &MockImageCleanup{ctrl: ctrl} + mock.recorder = &MockImageCleanupMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockImageCleanup) EXPECT() *MockImageCleanupMockRecorder { + return m.recorder +} + +// Do mocks base method. +func (m *MockImageCleanup) Do(arg0 context.Context) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Do", arg0) + ret0, _ := ret[0].(error) + return ret0 +} + +// Do indicates an expected call of Do. +func (mr *MockImageCleanupMockRecorder) Do(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Do", reflect.TypeOf((*MockImageCleanup)(nil).Do), arg0) +} diff --git a/services/components/lifecycle/bot_manager.go b/services/components/lifecycle/bot_manager.go index f4b047e2..fced0e2c 100644 --- a/services/components/lifecycle/bot_manager.go +++ b/services/components/lifecycle/bot_manager.go @@ -111,7 +111,7 @@ func (blm *botLifecycleManager) ManageBots(ctx context.Context) error { // then stop the containers for _, removedBotConfig := range removedBotConfigs { - if err := blm.botClient.TearDownBot(ctx, removedBotConfig.ContainerName(), true); err != nil { + if err := blm.botClient.TearDownBot(ctx, removedBotConfig.ContainerName()); err != nil { log.WithError(err).WithField("container", removedBotConfig.ContainerName()). Warn("failed to tear down unassigned bot container") blm.lifecycleMetrics.BotError("unassigned.teardown", err, removedBotConfig) @@ -188,7 +188,7 @@ func (blm *botLifecycleManager) CleanupUnusedBots(ctx context.Context) error { continue } - if err := blm.botClient.TearDownBot(ctx, botContainerName, true); err != nil { + if err := blm.botClient.TearDownBot(ctx, botContainerName); err != nil { log.WithField("botContainer", botContainerName).WithError(err). Error("error while tearing down the unused bot") } @@ -286,7 +286,7 @@ func (blm *botLifecycleManager) TearDownRunningBots(ctx context.Context) { // then stop the containers for _, runningBotConfig := range blm.runningBots { - err := blm.botClient.TearDownBot(ctx, runningBotConfig.ContainerName(), false) + err := blm.botClient.TearDownBot(ctx, runningBotConfig.ContainerName()) if err != nil { blm.lifecycleMetrics.BotError("teardown.bot", err, runningBotConfig) log.WithError(err).WithField("container", runningBotConfig.ContainerName()). diff --git a/services/components/lifecycle/bot_manager_test.go b/services/components/lifecycle/bot_manager_test.go index 426de94b..7be67fb5 100644 --- a/services/components/lifecycle/bot_manager_test.go +++ b/services/components/lifecycle/bot_manager_test.go @@ -102,8 +102,8 @@ func (s *BotLifecycleManagerTestSuite) TestAddUpdateRemove() { s.botPool.EXPECT().RemoveBotsWithConfigs(removedBots) s.lifecycleMetrics.EXPECT().StatusStopping(removedBots) - s.botContainers.EXPECT().TearDownBot(gomock.Any(), removedBots[0].ContainerName(), true) - s.botContainers.EXPECT().TearDownBot(gomock.Any(), removedBots[1].ContainerName(), true) + s.botContainers.EXPECT().TearDownBot(gomock.Any(), removedBots[0].ContainerName()) + s.botContainers.EXPECT().TearDownBot(gomock.Any(), removedBots[1].ContainerName()) s.botContainers.EXPECT().EnsureBotImages(gomock.Any(), addedBots).Return([]error{nil, nil, nil}).Times(1) s.botContainers.EXPECT().LaunchBot(gomock.Any(), addedBots[0]).Return(nil).Times(1) @@ -215,7 +215,7 @@ func (s *BotLifecycleManagerTestSuite) TestCleanup() { State: "exited", }, }, nil).Times(1) - s.botContainers.EXPECT().TearDownBot(gomock.Any(), unusedBotConfig.ContainerName(), true).Return(nil) + s.botContainers.EXPECT().TearDownBot(gomock.Any(), unusedBotConfig.ContainerName()).Return(nil) s.r.NoError(s.botManager.CleanupUnusedBots(context.Background())) } @@ -234,8 +234,8 @@ func (s *BotLifecycleManagerTestSuite) TestTearDown() { s.botManager.runningBots = botConfigs s.botPool.EXPECT().RemoveBotsWithConfigs(botConfigs) - s.botContainers.EXPECT().TearDownBot(gomock.Any(), botConfigs[0].ContainerName(), false).Return(nil) - s.botContainers.EXPECT().TearDownBot(gomock.Any(), botConfigs[1].ContainerName(), false).Return(nil) + s.botContainers.EXPECT().TearDownBot(gomock.Any(), botConfigs[0].ContainerName()).Return(nil) + s.botContainers.EXPECT().TearDownBot(gomock.Any(), botConfigs[1].ContainerName()).Return(nil) s.botManager.TearDownRunningBots(context.Background()) } diff --git a/services/components/lifecycle/lifecycle_test.go b/services/components/lifecycle/lifecycle_test.go index 36bc29f5..4e4de352 100644 --- a/services/components/lifecycle/lifecycle_test.go +++ b/services/components/lifecycle/lifecycle_test.go @@ -426,7 +426,7 @@ func (s *LifecycleTestSuite) TestUnassigned() { // and should shortly be torn down s.lifecycleMetrics.EXPECT().StatusStopping(assigned[0]) - s.botContainers.EXPECT().TearDownBot(gomock.Any(), assigned[0].ContainerName(), true).Return(nil) + s.botContainers.EXPECT().TearDownBot(gomock.Any(), assigned[0].ContainerName()).Return(nil) s.lifecycleMetrics.EXPECT().StatusRunning().Times(1) s.lifecycleMetrics.EXPECT().ClientClose(assigned[0]) s.botGrpc.EXPECT().Close().AnyTimes() diff --git a/services/supervisor/bots.go b/services/supervisor/bots.go index 256ce6ae..dd14ef7a 100644 --- a/services/supervisor/bots.go +++ b/services/supervisor/bots.go @@ -29,6 +29,9 @@ func (sup *SupervisorService) doRefreshBotContainers() { if err := sup.botLifecycle.BotManager.CleanupUnusedBots(sup.ctx); err != nil { log.WithError(err).Error("error while cleaning up unused bots") } + if err := sup.botLifecycle.ImageCleanup.Do(sup.ctx); err != nil { + log.WithError(err).Error("error while image cleanup") + } if err := sup.botLifecycle.BotManager.RestartExitedBots(sup.ctx); err != nil { log.WithError(err).Error("error while restarting exited bots") } diff --git a/services/supervisor/bots_test.go b/services/supervisor/bots_test.go index 150f8534..7ddaeee6 100644 --- a/services/supervisor/bots_test.go +++ b/services/supervisor/bots_test.go @@ -4,6 +4,7 @@ import ( "errors" "testing" + mock_containers "github.com/forta-network/forta-node/services/components/containers/mocks" mock_lifecycle "github.com/forta-network/forta-node/services/components/lifecycle/mocks" "github.com/golang/mock/gomock" ) @@ -11,8 +12,11 @@ import ( func TestBotManagement(t *testing.T) { supervisor := &SupervisorService{} - botManager := mock_lifecycle.NewMockBotLifecycleManager(gomock.NewController(t)) + ctrl := gomock.NewController(t) + botManager := mock_lifecycle.NewMockBotLifecycleManager(ctrl) + imageCleanup := mock_containers.NewMockImageCleanup(ctrl) supervisor.botLifecycle.BotManager = botManager + supervisor.botLifecycle.ImageCleanup = imageCleanup testErr := errors.New("test error - ignore") @@ -20,10 +24,12 @@ func TestBotManagement(t *testing.T) { gomock.InOrder( botManager.EXPECT().ManageBots(gomock.Any()).Return(testErr), botManager.EXPECT().CleanupUnusedBots(gomock.Any()).Return(testErr), + imageCleanup.EXPECT().Do(gomock.Any()).Return(testErr), botManager.EXPECT().RestartExitedBots(gomock.Any()).Return(testErr), botManager.EXPECT().ExitInactiveBots(gomock.Any()).Return(testErr), botManager.EXPECT().ManageBots(gomock.Any()).Return(testErr), botManager.EXPECT().CleanupUnusedBots(gomock.Any()).Return(testErr), + imageCleanup.EXPECT().Do(gomock.Any()).Return(testErr), botManager.EXPECT().RestartExitedBots(gomock.Any()).Return(testErr), botManager.EXPECT().ExitInactiveBots(gomock.Any()).Return(testErr), ) From a41f794d4b587c1fd2e7d786489cbcfd2ab53dce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Caner=20=C3=87=C4=B1dam?= Date: Sun, 20 Aug 2023 00:55:15 +0200 Subject: [PATCH 2/8] remove commented out code --- services/components/containers/bot_client.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/services/components/containers/bot_client.go b/services/components/containers/bot_client.go index c46b0a07..e370f611 100644 --- a/services/components/containers/bot_client.go +++ b/services/components/containers/bot_client.go @@ -191,11 +191,6 @@ func (bc *botClient) TearDownBot(ctx context.Context, containerName string) erro }, ).WithError(err).Warn("failed to destroy the bot network") } - // if err := bc.client.RemoveImage(ctx, container.Image); err != nil { - // log.WithFields(log.Fields{ - // "image": container.Image, - // }).WithError(err).Warn("failed to remove image of the destroyed bot container") - // } return nil } From e6ecde16387f1296e7f4f0a66fdc1faf34f4013a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Caner=20=C3=87=C4=B1dam?= Date: Mon, 21 Aug 2023 16:43:18 +0200 Subject: [PATCH 3/8] remove using repo digests --- clients/docker/client.go | 1 + 1 file changed, 1 insertion(+) diff --git a/clients/docker/client.go b/clients/docker/client.go index 35742301..85807bcc 100644 --- a/clients/docker/client.go +++ b/clients/docker/client.go @@ -794,6 +794,7 @@ func (d *dockerClient) ListImages(ctx context.Context) (imgs []string, err error } for _, imgSummary := range imgSummaries { imgs = append(imgs, imgSummary.RepoTags...) + imgs = append(imgs, imgSummary.RepoDigests...) } return } From c9faf3c072783b71e8a0616611eaa40e64212f0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Caner=20=C3=87=C4=B1dam?= Date: Mon, 21 Aug 2023 17:20:35 +0200 Subject: [PATCH 4/8] prefer the image identifier in the digest => tag => id order --- clients/docker/client.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/clients/docker/client.go b/clients/docker/client.go index 85807bcc..602751f5 100644 --- a/clients/docker/client.go +++ b/clients/docker/client.go @@ -793,8 +793,15 @@ func (d *dockerClient) ListImages(ctx context.Context) (imgs []string, err error return nil, fmt.Errorf("failed to list locally available images: %v", err) } for _, imgSummary := range imgSummaries { - imgs = append(imgs, imgSummary.RepoTags...) - imgs = append(imgs, imgSummary.RepoDigests...) + if len(imgSummary.RepoDigests) > 0 { + imgs = append(imgs, imgSummary.RepoDigests[0]) + continue + } + if len(imgSummary.RepoTags) > 0 { + imgs = append(imgs, imgSummary.RepoTags[0]) + continue + } + imgs = append(imgs, imgSummary.ID) } return } From 29ffe09cca4199296c69d1bf6ee064cad14023ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Caner=20=C3=87=C4=B1dam?= Date: Tue, 22 Aug 2023 12:17:44 +0200 Subject: [PATCH 5/8] make heartbeat image ref dynamic --- config/agents.go | 5 ++-- services/components/components.go | 2 +- .../components/containers/image_cleanup.go | 12 ++++++-- .../containers/image_cleanup_test.go | 30 +++++++++++++++---- 4 files changed, 37 insertions(+), 12 deletions(-) diff --git a/config/agents.go b/config/agents.go index af6e34c4..3cfe7250 100644 --- a/config/agents.go +++ b/config/agents.go @@ -10,9 +10,8 @@ import ( ) const ( - AgentGrpcPort = "50051" - HeartbeatBotID = "0x37ce3df7facea4bd95619655347ffd66f50909d100fa74ae042f122a8b024c29" - HeartbeatBotImage = "bafybeicrbhj7ekrl6njozgpncl3jz4y64imxuilomkmaolztbyat5dtxri" + AgentGrpcPort = "50051" + HeartbeatBotID = "0x37ce3df7facea4bd95619655347ffd66f50909d100fa74ae042f122a8b024c29" ) type AgentConfig struct { diff --git a/services/components/components.go b/services/components/components.go index 018df3a8..d43704de 100644 --- a/services/components/components.go +++ b/services/components/components.go @@ -119,7 +119,7 @@ func GetBotLifecycleComponents(ctx context.Context, botLifeConfig BotLifecycleCo botLifeConfig.BotRegistry, botClient, lifecycleMediator, lifecycleMetrics, botMonitor, ) - imageCleanup := containers.NewImageCleanup(dockerClient, config.HeartbeatBotImage) + imageCleanup := containers.NewImageCleanup(dockerClient, botLifeConfig.BotRegistry) return BotLifecycle{ BotManager: botManager, diff --git a/services/components/containers/image_cleanup.go b/services/components/containers/image_cleanup.go index e0cdf034..d08b290c 100644 --- a/services/components/containers/image_cleanup.go +++ b/services/components/containers/image_cleanup.go @@ -8,6 +8,7 @@ import ( "github.com/forta-network/forta-node/clients" "github.com/forta-network/forta-node/clients/docker" + "github.com/forta-network/forta-node/services/components/registry" "github.com/sirupsen/logrus" ) @@ -20,14 +21,16 @@ type ImageCleanup interface { type imageCleanup struct { client clients.DockerClient + botRegistry registry.BotRegistry lastCleanup time.Time exclusionList []string } // NewImageCleanup creates new. -func NewImageCleanup(client clients.DockerClient, excludeImages ...string) *imageCleanup { +func NewImageCleanup(client clients.DockerClient, botRegistry registry.BotRegistry, excludeImages ...string) *imageCleanup { return &imageCleanup{ client: client, + botRegistry: botRegistry, exclusionList: excludeImages, } } @@ -49,10 +52,15 @@ func (ic *imageCleanup) Do(ctx context.Context) error { return fmt.Errorf("failed to list images during image cleanup: %v", err) } + heartbeatBot, err := ic.botRegistry.LoadHeartbeatBot() + if err != nil { + return fmt.Errorf("failed to load the heartbeat bot during cleanup: %v", err) + } + for _, image := range images { logger := logrus.WithField("image", image) - if ic.isExcluded(image) { + if ic.isExcluded(image) || image == heartbeatBot.Image { logger.Debug("image is excluded - skipping cleanup") continue } diff --git a/services/components/containers/image_cleanup_test.go b/services/components/containers/image_cleanup_test.go index e373f336..76eeae54 100644 --- a/services/components/containers/image_cleanup_test.go +++ b/services/components/containers/image_cleanup_test.go @@ -8,21 +8,26 @@ import ( "github.com/forta-network/forta-node/clients/docker" mock_clients "github.com/forta-network/forta-node/clients/mocks" + "github.com/forta-network/forta-node/config" + mock_registry "github.com/forta-network/forta-node/services/components/registry/mocks" "github.com/golang/mock/gomock" + "github.com/sirupsen/logrus" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" ) const ( - testCleanupImage1 = "bafybeitestcleanupimage1" - testCleanupImage2 = "bafybeitestcleanupimage2" - testCleanupImage3 = "bafybeitestcleanupimage3" + testCleanupImage1 = "bafybei-testcleanupimage1" + testCleanupImage2 = "bafybei-testcleanupimage2" + testCleanupImage3 = "bafybei-testcleanupimage3" + testHeartbeatBotImage = "bafybei-heartbeatbot" ) type ImageCleanupTestSuite struct { r *require.Assertions - client *mock_clients.MockDockerClient + client *mock_clients.MockDockerClient + botRegistry *mock_registry.MockBotRegistry imageCleanup *imageCleanup @@ -34,12 +39,15 @@ func TestImageCleanupTestSuite(t *testing.T) { } func (s *ImageCleanupTestSuite) SetupTest() { + logrus.SetLevel(logrus.DebugLevel) + s.r = s.Require() ctrl := gomock.NewController(s.T()) s.client = mock_clients.NewMockDockerClient(ctrl) + s.botRegistry = mock_registry.NewMockBotRegistry(ctrl) - s.imageCleanup = NewImageCleanup(s.client) + s.imageCleanup = NewImageCleanup(s.client, s.botRegistry) } func (s *ImageCleanupTestSuite) TestIntervalSkip() { @@ -63,11 +71,20 @@ func (s *ImageCleanupTestSuite) TestImagesListError() { s.r.Error(s.imageCleanup.Do(context.Background())) } +func (s *ImageCleanupTestSuite) TestHeartbeatBotError() { + s.client.EXPECT().GetContainers(gomock.Any()).Return(docker.ContainerList{}, nil) + s.client.EXPECT().ListImages(gomock.Any()).Return([]string{testCleanupImage1}, nil) + s.botRegistry.EXPECT().LoadHeartbeatBot().Return(nil, errors.New("test error")) + + s.r.Error(s.imageCleanup.Do(context.Background())) +} + func (s *ImageCleanupTestSuite) TestRemoveImageError() { initialLastCleanup := s.imageCleanup.lastCleanup s.client.EXPECT().GetContainers(gomock.Any()).Return(docker.ContainerList{}, nil) s.client.EXPECT().ListImages(gomock.Any()).Return([]string{testCleanupImage1}, nil) + s.botRegistry.EXPECT().LoadHeartbeatBot().Return(&config.AgentConfig{Image: testHeartbeatBotImage}, nil) s.client.EXPECT().RemoveImage(gomock.Any(), testCleanupImage1).Return(errors.New("test error")) // no error and mutated last cleanup timestamp: removal errors do not affect this @@ -85,8 +102,9 @@ func (s *ImageCleanupTestSuite) TestCleanupSuccess() { }, }, nil) s.client.EXPECT().ListImages(gomock.Any()).Return( - []string{testCleanupImage1, testCleanupImage2, testCleanupImage3}, nil, + []string{testCleanupImage1, testCleanupImage2, testCleanupImage3, testHeartbeatBotImage}, nil, ) + s.botRegistry.EXPECT().LoadHeartbeatBot().Return(&config.AgentConfig{Image: testHeartbeatBotImage}, nil) s.client.EXPECT().RemoveImage(gomock.Any(), testCleanupImage3).Return(nil) // only removes image 3 s.r.NoError(s.imageCleanup.Do(context.Background())) From c419585cefd884825bfcf9f09fc605f9020d22da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Caner=20=C3=87=C4=B1dam?= Date: Tue, 22 Aug 2023 13:47:40 +0200 Subject: [PATCH 6/8] add log line --- services/components/containers/image_cleanup.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/components/containers/image_cleanup.go b/services/components/containers/image_cleanup.go index d08b290c..ced4e2b6 100644 --- a/services/components/containers/image_cleanup.go +++ b/services/components/containers/image_cleanup.go @@ -57,6 +57,8 @@ func (ic *imageCleanup) Do(ctx context.Context) error { return fmt.Errorf("failed to load the heartbeat bot during cleanup: %v", err) } + logrus.WithField("image", heartbeatBot.Image).Debug("cleanup: loaded heartbeat bot image reference") + for _, image := range images { logger := logrus.WithField("image", image) From b45eb105bbcc10245e9a816bdf7d18338427a2af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Caner=20=C3=87=C4=B1dam?= Date: Tue, 22 Aug 2023 14:33:56 +0200 Subject: [PATCH 7/8] work with digest references only --- clients/docker/client.go | 14 +++----------- clients/interfaces.go | 2 +- clients/mocks/mock_clients.go | 12 ++++++------ services/components/containers/image_cleanup.go | 2 +- .../components/containers/image_cleanup_test.go | 8 ++++---- 5 files changed, 15 insertions(+), 23 deletions(-) diff --git a/clients/docker/client.go b/clients/docker/client.go index 602751f5..cda73b6f 100644 --- a/clients/docker/client.go +++ b/clients/docker/client.go @@ -786,22 +786,14 @@ func (d *dockerClient) EnsureLocalImages(ctx context.Context, timeoutPerPull tim return } -// ListImages lists all images. -func (d *dockerClient) ListImages(ctx context.Context) (imgs []string, err error) { +// ListDigestReferences lists all digest references of all images. +func (d *dockerClient) ListDigestReferences(ctx context.Context) (imgs []string, err error) { imgSummaries, err := d.cli.ImageList(ctx, types.ImageListOptions{}) if err != nil { return nil, fmt.Errorf("failed to list locally available images: %v", err) } for _, imgSummary := range imgSummaries { - if len(imgSummary.RepoDigests) > 0 { - imgs = append(imgs, imgSummary.RepoDigests[0]) - continue - } - if len(imgSummary.RepoTags) > 0 { - imgs = append(imgs, imgSummary.RepoTags[0]) - continue - } - imgs = append(imgs, imgSummary.ID) + imgs = append(imgs, imgSummary.RepoDigests...) } return } diff --git a/clients/interfaces.go b/clients/interfaces.go index 6cf03ac9..ccc5e7ff 100644 --- a/clients/interfaces.go +++ b/clients/interfaces.go @@ -43,7 +43,7 @@ type DockerClient interface { HasLocalImage(ctx context.Context, ref string) (bool, error) EnsureLocalImage(ctx context.Context, name, ref string) error EnsureLocalImages(ctx context.Context, timeoutPerPull time.Duration, imagePulls []docker.ImagePull) []error - ListImages(ctx context.Context) ([]string, error) + ListDigestReferences(ctx context.Context) ([]string, error) GetContainerLogs(ctx context.Context, containerID, tail string, truncate int) (string, error) GetContainerFromRemoteAddr(ctx context.Context, hostPort string) (*types.Container, error) SetImagePullCooldown(threshold int, cooldownDuration time.Duration) diff --git a/clients/mocks/mock_clients.go b/clients/mocks/mock_clients.go index 225d3c8b..1b37422c 100644 --- a/clients/mocks/mock_clients.go +++ b/clients/mocks/mock_clients.go @@ -275,19 +275,19 @@ func (mr *MockDockerClientMockRecorder) InterruptContainer(ctx, id interface{}) return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InterruptContainer", reflect.TypeOf((*MockDockerClient)(nil).InterruptContainer), ctx, id) } -// ListImages mocks base method. -func (m *MockDockerClient) ListImages(ctx context.Context) ([]string, error) { +// ListDigestReferences mocks base method. +func (m *MockDockerClient) ListDigestReferences(ctx context.Context) ([]string, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "ListImages", ctx) + ret := m.ctrl.Call(m, "ListDigestReferences", ctx) ret0, _ := ret[0].([]string) ret1, _ := ret[1].(error) return ret0, ret1 } -// ListImages indicates an expected call of ListImages. -func (mr *MockDockerClientMockRecorder) ListImages(ctx interface{}) *gomock.Call { +// ListDigestReferences indicates an expected call of ListDigestReferences. +func (mr *MockDockerClientMockRecorder) ListDigestReferences(ctx interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListImages", reflect.TypeOf((*MockDockerClient)(nil).ListImages), ctx) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListDigestReferences", reflect.TypeOf((*MockDockerClient)(nil).ListDigestReferences), ctx) } // Nuke mocks base method. diff --git a/services/components/containers/image_cleanup.go b/services/components/containers/image_cleanup.go index ced4e2b6..afd69e6b 100644 --- a/services/components/containers/image_cleanup.go +++ b/services/components/containers/image_cleanup.go @@ -47,7 +47,7 @@ func (ic *imageCleanup) Do(ctx context.Context) error { return fmt.Errorf("failed to get containers during image cleanup: %v", err) } - images, err := ic.client.ListImages(ctx) + images, err := ic.client.ListDigestReferences(ctx) if err != nil { return fmt.Errorf("failed to list images during image cleanup: %v", err) } diff --git a/services/components/containers/image_cleanup_test.go b/services/components/containers/image_cleanup_test.go index 76eeae54..2b7ec197 100644 --- a/services/components/containers/image_cleanup_test.go +++ b/services/components/containers/image_cleanup_test.go @@ -66,14 +66,14 @@ func (s *ImageCleanupTestSuite) TestContainerListError() { func (s *ImageCleanupTestSuite) TestImagesListError() { s.client.EXPECT().GetContainers(gomock.Any()).Return(docker.ContainerList{}, nil) - s.client.EXPECT().ListImages(gomock.Any()).Return(nil, errors.New("test error")) + s.client.EXPECT().ListDigestReferences(gomock.Any()).Return(nil, errors.New("test error")) s.r.Error(s.imageCleanup.Do(context.Background())) } func (s *ImageCleanupTestSuite) TestHeartbeatBotError() { s.client.EXPECT().GetContainers(gomock.Any()).Return(docker.ContainerList{}, nil) - s.client.EXPECT().ListImages(gomock.Any()).Return([]string{testCleanupImage1}, nil) + s.client.EXPECT().ListDigestReferences(gomock.Any()).Return([]string{testCleanupImage1}, nil) s.botRegistry.EXPECT().LoadHeartbeatBot().Return(nil, errors.New("test error")) s.r.Error(s.imageCleanup.Do(context.Background())) @@ -83,7 +83,7 @@ func (s *ImageCleanupTestSuite) TestRemoveImageError() { initialLastCleanup := s.imageCleanup.lastCleanup s.client.EXPECT().GetContainers(gomock.Any()).Return(docker.ContainerList{}, nil) - s.client.EXPECT().ListImages(gomock.Any()).Return([]string{testCleanupImage1}, nil) + s.client.EXPECT().ListDigestReferences(gomock.Any()).Return([]string{testCleanupImage1}, nil) s.botRegistry.EXPECT().LoadHeartbeatBot().Return(&config.AgentConfig{Image: testHeartbeatBotImage}, nil) s.client.EXPECT().RemoveImage(gomock.Any(), testCleanupImage1).Return(errors.New("test error")) @@ -101,7 +101,7 @@ func (s *ImageCleanupTestSuite) TestCleanupSuccess() { Image: testCleanupImage2, // image in use by container }, }, nil) - s.client.EXPECT().ListImages(gomock.Any()).Return( + s.client.EXPECT().ListDigestReferences(gomock.Any()).Return( []string{testCleanupImage1, testCleanupImage2, testCleanupImage3, testHeartbeatBotImage}, nil, ) s.botRegistry.EXPECT().LoadHeartbeatBot().Return(&config.AgentConfig{Image: testHeartbeatBotImage}, nil) From ca387b7274615e438c42838230e7dc4bf9d9e612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Caner=20=C3=87=C4=B1dam?= Date: Tue, 22 Aug 2023 14:35:11 +0200 Subject: [PATCH 8/8] add comment to explain the reason --- services/components/containers/image_cleanup.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/components/containers/image_cleanup.go b/services/components/containers/image_cleanup.go index afd69e6b..c028c5d2 100644 --- a/services/components/containers/image_cleanup.go +++ b/services/components/containers/image_cleanup.go @@ -47,6 +47,8 @@ func (ic *imageCleanup) Do(ctx context.Context) error { return fmt.Errorf("failed to get containers during image cleanup: %v", err) } + // we list the digest references as the main references for all images + // because we pull by digest references images, err := ic.client.ListDigestReferences(ctx) if err != nil { return fmt.Errorf("failed to list images during image cleanup: %v", err)