Skip to content

Commit

Permalink
[MGMT-14453](https://issues.redhat.com//browse/MGMT-14453): Fix bugs …
Browse files Browse the repository at this point in the history
…in the installer cache

This PR is for the purpose of resolving  multiple bugs within the installer cache, due to the poor condition of the current cache, it makes sense to fix this in a single PR.

* https://issues.redhat.com/browse/MGMT-14452
Installer cache removes in-used cached image when out of space
* https://issues.redhat.com/browse/MGMT-14453
INSTALLER_CACHE_CAPACITY small value cause to assisted-service crash
* https://issues.redhat.com/browse/MGMT-14457
Installer cache - fails to install when running parallel with same version
* Additionally, the cache did not respect limits, so this has been addressed here.

Fixes:

I have implemented fixes for each of the following issues.

* Mutex was ineffective as not instantiated corrctly, leading to [MGMT-14452](https://issues.redhat.com//browse/MGMT-14452), [MGMT-14453](https://issues.redhat.com//browse/MGMT-14453).
* Naming convention for hardlinks changed to be UUID based to resolve [MGMT-14457](https://issues.redhat.com//browse/MGMT-14457).
* Any time we either extract or use a release, the modified time must be updated, not only for cached releases. This was causing premature pruning of hardlinks.
* LRU cache order updated to be based on microseconds instead of seconds.
* Eviction timestamp checks updated to consider max release size and also cache threshold.
* We now check there is enough space before writing.
* During eviction - releases without hard links will be evicted before releases with hard links.
  • Loading branch information
paul-maidment committed Jan 30, 2025
1 parent ef9f0c8 commit eef604c
Show file tree
Hide file tree
Showing 9 changed files with 747 additions and 198 deletions.
30 changes: 28 additions & 2 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"time"

"github.com/NYTimes/gziphandler"
"github.com/alecthomas/units"
"github.com/go-openapi/runtime"
"github.com/go-openapi/strfmt"
"github.com/go-openapi/swag"
Expand All @@ -39,6 +40,7 @@ import (
"github.com/openshift/assisted-service/internal/ignition"
"github.com/openshift/assisted-service/internal/infraenv"
installcfg "github.com/openshift/assisted-service/internal/installcfg/builder"
"github.com/openshift/assisted-service/internal/installercache"
internaljson "github.com/openshift/assisted-service/internal/json"
"github.com/openshift/assisted-service/internal/manifests"
"github.com/openshift/assisted-service/internal/metrics"
Expand Down Expand Up @@ -172,6 +174,18 @@ var Options struct {

// EnableXattrFallback is a boolean flag to enable en emulated fallback methoid of xattr on systems that do not support xattr.
EnableXattrFallback bool `envconfig:"ENABLE_XATTR_FALLBACK" default:"true"`

// InstallerCacheCapacityGiB is the capacity of the installer cache in GiB
InstallerCacheCapacityGiB uint `envconfig:"INSTALLER_CACHE_CAPACITY_GIB" default:"0"`

// InstallerCacheMaxReleaseSizeGiB is the expected maximum size of a single release in GiB
InstallerCacheMaxReleaseSizeGiB uint `envconfig:"INSTALLER_CACHE_MAX_RELEASE_SIZE_GIB" default:"2"`

// InstallerCacheEvictionThresholdPercent is the percentage of capacity at which the cache will start to evict releases.
InstallerCacheEvictionThresholdPercent uint `envconfig:"INSTALLER_CACHE_EVICTION_THRESHOLD_PERCENT" default:"80"`

// ReleaseFetchRetryIntervalSeconds is the number of seconds that the cache should wait before retrying the fetch of a release if unable to do so for capacity reasons.
ReleaseFetchRetryIntervalSeconds uint `envconfig:"INSTALLER_CACHE_RELEASE_FETCH_RETRY_INTERVAL_SECONDS" default:"30"`
}

func InitLogs(logLevel, logFormat string) *logrus.Logger {
Expand Down Expand Up @@ -315,7 +329,8 @@ func main() {
metricsManagerConfig := &metrics.MetricsManagerConfig{
DirectoryUsageMonitorConfig: metrics.DirectoryUsageMonitorConfig{
Directories: []string{Options.WorkDir}}}
metricsManager := metrics.NewMetricsManager(prometheusRegistry, eventsHandler, metrics.NewOSDiskStatsHelper(), metricsManagerConfig, log)
diskStatsHelper := metrics.NewOSDiskStatsHelper(logrus.New())
metricsManager := metrics.NewMetricsManager(prometheusRegistry, eventsHandler, diskStatsHelper, metricsManagerConfig, log)
if ocmClient != nil {
//inject the metric server to the ocm client for purpose of
//performance monitoring the calls to ACM. This could not be done
Expand Down Expand Up @@ -488,7 +503,18 @@ func main() {
failOnError(err, "failed to create valid bm config S3 endpoint URL from %s", Options.BMConfig.S3EndpointURL)
Options.BMConfig.S3EndpointURL = newUrl

generator := generator.New(log, objectHandler, Options.GeneratorConfig, Options.WorkDir, providerRegistry, manifestsApi, eventsHandler)
installGeneratorDirectoryConfig := generator.InstallGeneratorDirectoryConfig{WorkDir: Options.WorkDir}
installerCacheConfig := installercache.InstallerCacheConfig{
CacheDir: filepath.Join(installGeneratorDirectoryConfig.GetWorkingDirectory(), "installercache"),
MaxCapacity: int64(Options.InstallerCacheCapacityGiB) * int64(units.GiB),
MaxReleaseSize: int64(Options.InstallerCacheMaxReleaseSizeGiB) * int64(units.GiB),
ReleaseFetchRetryInterval: time.Duration(Options.ReleaseFetchRetryIntervalSeconds) * time.Second,
InstallerCacheEvictionThreshold: float64(Options.InstallerCacheEvictionThresholdPercent) / 100,
}
installerCache, err := installercache.New(installerCacheConfig, eventsHandler, diskStatsHelper, log)
failOnError(err, "failed to instantiate installercache")

generator := generator.New(installGeneratorDirectoryConfig, log, objectHandler, Options.GeneratorConfig, providerRegistry, manifestsApi, eventsHandler, installerCache)
var crdUtils bminventory.CRDUtils
if ctrlMgr != nil {
crdUtils = controllers.NewCRDUtils(ctrlMgr.GetClient(), hostApi)
Expand Down
8 changes: 3 additions & 5 deletions internal/ignition/installmanifests.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ type installerGenerator struct {
cluster *common.Cluster
releaseImage string
releaseImageMirror string
installerDir string
serviceCACert string
encodedDhcpFileContents string
s3Client s3wrapper.API
Expand All @@ -110,24 +109,23 @@ var fileNames = [...]string{
}

// NewGenerator returns a generator that can generate ignition files
func NewGenerator(workDir string, installerDir string, cluster *common.Cluster, releaseImage string, releaseImageMirror string,
func NewGenerator(workDir string, cluster *common.Cluster, releaseImage string, releaseImageMirror string,
serviceCACert string, installInvoker string, s3Client s3wrapper.API, log logrus.FieldLogger, providerRegistry registry.ProviderRegistry,
installerReleaseImageOverride, clusterTLSCertOverrideDir string, storageCapacityLimit int64, manifestApi manifestsapi.ManifestsAPI, eventsHandler eventsapi.Handler) Generator {
installerReleaseImageOverride, clusterTLSCertOverrideDir string, manifestApi manifestsapi.ManifestsAPI, eventsHandler eventsapi.Handler, installerCache *installercache.Installers) Generator {
return &installerGenerator{
cluster: cluster,
log: log,
releaseImage: releaseImage,
releaseImageMirror: releaseImageMirror,
workDir: workDir,
installerDir: installerDir,
serviceCACert: serviceCACert,
s3Client: s3Client,
enableMetal3Provisioning: true,
installInvoker: installInvoker,
providerRegistry: providerRegistry,
installerReleaseImageOverride: installerReleaseImageOverride,
clusterTLSCertOverrideDir: clusterTLSCertOverrideDir,
installerCache: installercache.New(installerDir, storageCapacityLimit, eventsHandler, log),
installerCache: installerCache,
manifestApi: manifestApi,
}
}
Expand Down
Loading

0 comments on commit eef604c

Please sign in to comment.