diff --git a/api/v1/storagecluster_types.go b/api/v1/storagecluster_types.go index 7f0a34545e..b6753b5941 100644 --- a/api/v1/storagecluster_types.go +++ b/api/v1/storagecluster_types.go @@ -216,6 +216,9 @@ type ManageCephCluster struct { // Whether to allow updating the device class after the OSD is initially provisioned AllowDeviceClassUpdate bool `json:"allowDeviceClassUpdate,omitempty"` + + // CephClusterHealthCheckSpec represent the healthcheck for Ceph daemons + HealthCheck *rookCephv1.CephClusterHealthCheckSpec `json:"healthCheck,omitempty"` } // ManageCephConfig defines how to reconcile the Ceph configuration diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 25007bf27e..2fa69d6729 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -294,6 +294,11 @@ func (in *ManageCephCluster) DeepCopyInto(out *ManageCephCluster) { *out = new(float64) **out = **in } + if in.HealthCheck != nil { + in, out := &in.HealthCheck, &out.HealthCheck + *out = new(ceph_rook_iov1.CephClusterHealthCheckSpec) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ManageCephCluster. diff --git a/config/crd/bases/ocs.openshift.io_storageclusters.yaml b/config/crd/bases/ocs.openshift.io_storageclusters.yaml index 85d94e7a06..b0f35f513c 100644 --- a/config/crd/bases/ocs.openshift.io_storageclusters.yaml +++ b/config/crd/bases/ocs.openshift.io_storageclusters.yaml @@ -766,6 +766,402 @@ spec: minimum: 0 nullable: true type: number + healthCheck: + description: CephClusterHealthCheckSpec represent the healthcheck + for Ceph daemons + properties: + daemonHealth: + description: DaemonHealth is the health check for a given + daemon + nullable: true + properties: + mon: + description: Monitor represents the health check settings + for the Ceph monitor + nullable: true + properties: + disabled: + type: boolean + interval: + description: Interval is the internal in second + or minute for the health check to run like 60s + for 60 seconds + type: string + timeout: + type: string + type: object + osd: + description: ObjectStorageDaemon represents the health + check settings for the Ceph OSDs + nullable: true + properties: + disabled: + type: boolean + interval: + description: Interval is the internal in second + or minute for the health check to run like 60s + for 60 seconds + type: string + timeout: + type: string + type: object + status: + description: Status represents the health check settings + for the Ceph health + nullable: true + properties: + disabled: + type: boolean + interval: + description: Interval is the internal in second + or minute for the health check to run like 60s + for 60 seconds + type: string + timeout: + type: string + type: object + type: object + livenessProbe: + additionalProperties: + description: ProbeSpec is a wrapper around Probe so + it can be enabled or disabled for a Ceph daemon + properties: + disabled: + description: Disabled determines whether probe is + disable or not + type: boolean + probe: + description: |- + Probe describes a health check to be performed against a container to determine whether it is + alive or ready to receive traffic. + properties: + exec: + description: Exec specifies a command to execute + in the container. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies a GRPC HealthCheckRequest. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies an HTTP GET request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the + request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies a connection + to a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + type: object + description: LivenessProbe allows changing the livenessProbe + configuration for a given daemon + type: object + startupProbe: + additionalProperties: + description: ProbeSpec is a wrapper around Probe so + it can be enabled or disabled for a Ceph daemon + properties: + disabled: + description: Disabled determines whether probe is + disable or not + type: boolean + probe: + description: |- + Probe describes a health check to be performed against a container to determine whether it is + alive or ready to receive traffic. + properties: + exec: + description: Exec specifies a command to execute + in the container. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies a GRPC HealthCheckRequest. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies an HTTP GET request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the + request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies a connection + to a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + type: object + description: StartupProbe allows changing the startupProbe + configuration for a given daemon + type: object + type: object mgrCount: enum: - 1 diff --git a/controllers/storagecluster/cephcluster.go b/controllers/storagecluster/cephcluster.go index 1cfaad2717..f9a2c21c17 100644 --- a/controllers/storagecluster/cephcluster.go +++ b/controllers/storagecluster/cephcluster.go @@ -503,6 +503,12 @@ func newCephCluster(sc *ocsv1.StorageCluster, cephImage string, kmsConfigMap *co cephCluster.Spec.DisruptionManagement.OSDMaintenanceTimeout = sc.Spec.ManagedResources.CephCluster.OsdMaintenanceTimeout } + if sc.Spec.ManagedResources.CephCluster.HealthCheck != nil { + cephCluster.Spec.HealthCheck = *sc.Spec.ManagedResources.CephCluster.HealthCheck + } else { + cephCluster.Spec.HealthCheck = rookCephv1.CephClusterHealthCheckSpec{} + } + if sc.Spec.LogCollector != nil { if sc.Spec.LogCollector.Periodicity != "" { cephCluster.Spec.LogCollector.Periodicity = sc.Spec.LogCollector.Periodicity diff --git a/controllers/storagecluster/cephcluster_test.go b/controllers/storagecluster/cephcluster_test.go index 3833cb9913..a64a9021ec 100644 --- a/controllers/storagecluster/cephcluster_test.go +++ b/controllers/storagecluster/cephcluster_test.go @@ -1539,6 +1539,77 @@ func TestEnsureUpgradeReliabilityParams(t *testing.T) { assert.Equal(t, 45*time.Minute, expected.Spec.DisruptionManagement.OSDMaintenanceTimeout) } +func TestHealthCheckConfiguration(t *testing.T) { + sc := &ocsv1.StorageCluster{} + mockStorageCluster.DeepCopyInto(sc) + interval := metav1.Duration{ + Duration: 20 * time.Second, + } + mockProbeSpec := &rookCephv1.ProbeSpec{ + Disabled: false, + Probe: &corev1.Probe{ + InitialDelaySeconds: 10, + TimeoutSeconds: 5, + }, + } + probeMap := make(map[rookCephv1.KeyType]*rookCephv1.ProbeSpec) + probeMap["abc"] = mockProbeSpec + + sc.Spec.ManagedResources.CephCluster.HealthCheck = &rookCephv1.CephClusterHealthCheckSpec{ + DaemonHealth: rookCephv1.DaemonHealthSpec{ + Status: rookCephv1.HealthCheckSpec{ + Timeout: "11", + Disabled: false, + Interval: &interval, + }, + Monitor: rookCephv1.HealthCheckSpec{ + Timeout: "22", + Disabled: true, + Interval: &interval, + }, + ObjectStorageDaemon: rookCephv1.HealthCheckSpec{ + Timeout: "33", + Disabled: false, + Interval: &interval, + }, + }, + StartupProbe: probeMap, + LivenessProbe: probeMap, + } + expected := newCephCluster(sc, "", nil, log) + + assert.Equal(t, "11", expected.Spec.HealthCheck.DaemonHealth.Status.Timeout) + assert.Equal(t, false, expected.Spec.HealthCheck.DaemonHealth.Status.Disabled) + assert.Equal(t, &interval, expected.Spec.HealthCheck.DaemonHealth.Status.Interval) + + assert.Equal(t, "22", expected.Spec.HealthCheck.DaemonHealth.Monitor.Timeout) + assert.Equal(t, true, expected.Spec.HealthCheck.DaemonHealth.Monitor.Disabled) + assert.Equal(t, &interval, expected.Spec.HealthCheck.DaemonHealth.Monitor.Interval) + + assert.Equal(t, "33", expected.Spec.HealthCheck.DaemonHealth.ObjectStorageDaemon.Timeout) + assert.Equal(t, false, expected.Spec.HealthCheck.DaemonHealth.ObjectStorageDaemon.Disabled) + assert.Equal(t, &interval, expected.Spec.HealthCheck.DaemonHealth.ObjectStorageDaemon.Interval) + + compareProbeMaps(t, probeMap, expected.Spec.HealthCheck.LivenessProbe) + compareProbeMaps(t, probeMap, expected.Spec.HealthCheck.StartupProbe) + +} + +// Helper function to compare two maps +func compareProbeMaps(t *testing.T, map1, map2 map[rookCephv1.KeyType]*rookCephv1.ProbeSpec) { + assert.Equal(t, len(map1), len(map2)) + + for key, value1 := range map1 { + value2, exists := map2[key] + assert.Assert(t, exists, "Key %v not found in map2", key) + + // Compare the actual ProbeSpec values + assert.Equal(t, value1.Disabled, value2.Disabled) + assert.Equal(t, value1.Probe.InitialDelaySeconds, value2.Probe.InitialDelaySeconds) + assert.Equal(t, value1.Probe.TimeoutSeconds, value2.Probe.TimeoutSeconds) + } +} + func TestDetermineDefaultCephDeviceClass(t *testing.T) { cases := []struct { label string diff --git a/deploy/csv-templates/crds/ocs/ocs.openshift.io_storageclusters.yaml b/deploy/csv-templates/crds/ocs/ocs.openshift.io_storageclusters.yaml index 85d94e7a06..b0f35f513c 100644 --- a/deploy/csv-templates/crds/ocs/ocs.openshift.io_storageclusters.yaml +++ b/deploy/csv-templates/crds/ocs/ocs.openshift.io_storageclusters.yaml @@ -766,6 +766,402 @@ spec: minimum: 0 nullable: true type: number + healthCheck: + description: CephClusterHealthCheckSpec represent the healthcheck + for Ceph daemons + properties: + daemonHealth: + description: DaemonHealth is the health check for a given + daemon + nullable: true + properties: + mon: + description: Monitor represents the health check settings + for the Ceph monitor + nullable: true + properties: + disabled: + type: boolean + interval: + description: Interval is the internal in second + or minute for the health check to run like 60s + for 60 seconds + type: string + timeout: + type: string + type: object + osd: + description: ObjectStorageDaemon represents the health + check settings for the Ceph OSDs + nullable: true + properties: + disabled: + type: boolean + interval: + description: Interval is the internal in second + or minute for the health check to run like 60s + for 60 seconds + type: string + timeout: + type: string + type: object + status: + description: Status represents the health check settings + for the Ceph health + nullable: true + properties: + disabled: + type: boolean + interval: + description: Interval is the internal in second + or minute for the health check to run like 60s + for 60 seconds + type: string + timeout: + type: string + type: object + type: object + livenessProbe: + additionalProperties: + description: ProbeSpec is a wrapper around Probe so + it can be enabled or disabled for a Ceph daemon + properties: + disabled: + description: Disabled determines whether probe is + disable or not + type: boolean + probe: + description: |- + Probe describes a health check to be performed against a container to determine whether it is + alive or ready to receive traffic. + properties: + exec: + description: Exec specifies a command to execute + in the container. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies a GRPC HealthCheckRequest. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies an HTTP GET request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the + request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies a connection + to a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + type: object + description: LivenessProbe allows changing the livenessProbe + configuration for a given daemon + type: object + startupProbe: + additionalProperties: + description: ProbeSpec is a wrapper around Probe so + it can be enabled or disabled for a Ceph daemon + properties: + disabled: + description: Disabled determines whether probe is + disable or not + type: boolean + probe: + description: |- + Probe describes a health check to be performed against a container to determine whether it is + alive or ready to receive traffic. + properties: + exec: + description: Exec specifies a command to execute + in the container. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies a GRPC HealthCheckRequest. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies an HTTP GET request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the + request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies a connection + to a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + type: object + description: StartupProbe allows changing the startupProbe + configuration for a given daemon + type: object + type: object mgrCount: enum: - 1 diff --git a/deploy/ocs-operator/manifests/storagecluster.crd.yaml b/deploy/ocs-operator/manifests/storagecluster.crd.yaml index 85d94e7a06..b0f35f513c 100644 --- a/deploy/ocs-operator/manifests/storagecluster.crd.yaml +++ b/deploy/ocs-operator/manifests/storagecluster.crd.yaml @@ -766,6 +766,402 @@ spec: minimum: 0 nullable: true type: number + healthCheck: + description: CephClusterHealthCheckSpec represent the healthcheck + for Ceph daemons + properties: + daemonHealth: + description: DaemonHealth is the health check for a given + daemon + nullable: true + properties: + mon: + description: Monitor represents the health check settings + for the Ceph monitor + nullable: true + properties: + disabled: + type: boolean + interval: + description: Interval is the internal in second + or minute for the health check to run like 60s + for 60 seconds + type: string + timeout: + type: string + type: object + osd: + description: ObjectStorageDaemon represents the health + check settings for the Ceph OSDs + nullable: true + properties: + disabled: + type: boolean + interval: + description: Interval is the internal in second + or minute for the health check to run like 60s + for 60 seconds + type: string + timeout: + type: string + type: object + status: + description: Status represents the health check settings + for the Ceph health + nullable: true + properties: + disabled: + type: boolean + interval: + description: Interval is the internal in second + or minute for the health check to run like 60s + for 60 seconds + type: string + timeout: + type: string + type: object + type: object + livenessProbe: + additionalProperties: + description: ProbeSpec is a wrapper around Probe so + it can be enabled or disabled for a Ceph daemon + properties: + disabled: + description: Disabled determines whether probe is + disable or not + type: boolean + probe: + description: |- + Probe describes a health check to be performed against a container to determine whether it is + alive or ready to receive traffic. + properties: + exec: + description: Exec specifies a command to execute + in the container. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies a GRPC HealthCheckRequest. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies an HTTP GET request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the + request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies a connection + to a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + type: object + description: LivenessProbe allows changing the livenessProbe + configuration for a given daemon + type: object + startupProbe: + additionalProperties: + description: ProbeSpec is a wrapper around Probe so + it can be enabled or disabled for a Ceph daemon + properties: + disabled: + description: Disabled determines whether probe is + disable or not + type: boolean + probe: + description: |- + Probe describes a health check to be performed against a container to determine whether it is + alive or ready to receive traffic. + properties: + exec: + description: Exec specifies a command to execute + in the container. + properties: + command: + description: |- + Command is the command line to execute inside the container, the working directory for the + command is root ('/') in the container's filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use + a shell, you need to explicitly call out to that shell. + Exit status of 0 is treated as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + description: |- + Minimum consecutive failures for the probe to be considered failed after having succeeded. + Defaults to 3. Minimum value is 1. + format: int32 + type: integer + grpc: + description: GRPC specifies a GRPC HealthCheckRequest. + properties: + port: + description: Port number of the gRPC service. + Number must be in the range 1 to 65535. + format: int32 + type: integer + service: + default: "" + description: |- + Service is the name of the service to place in the gRPC HealthCheckRequest + (see https://github.com/grpc/grpc/blob/master/doc/health-checking.md). + + If this is not specified, the default behavior is defined by gRPC. + type: string + required: + - port + type: object + httpGet: + description: HTTPGet specifies an HTTP GET request + to perform. + properties: + host: + description: |- + Host name to connect to, defaults to the pod IP. You probably want to set + "Host" in httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the + request. HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom + header to be used in HTTP probes + properties: + name: + description: |- + The header field name. + This will be canonicalized upon output, so case-variant names will be understood as the same header. + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + description: Path to access on the HTTP + server. + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Name or number of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: |- + Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: |- + Number of seconds after the container has started before liveness probes are initiated. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + periodSeconds: + description: |- + How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: |- + Minimum consecutive successes for the probe to be considered successful after having failed. + Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1. + format: int32 + type: integer + tcpSocket: + description: TCPSocket specifies a connection + to a TCP port. + properties: + host: + description: 'Optional: Host name to connect + to, defaults to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: |- + Number or name of the port to access on the container. + Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: |- + Optional duration in seconds the pod needs to terminate gracefully upon probe failure. + The grace period is the duration in seconds after the processes running in the pod are sent + a termination signal and the time when the processes are forcibly halted with a kill signal. + Set this value longer than the expected cleanup time for your process. + If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this + value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates stop immediately via + the kill signal (no opportunity to shut down). + This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: |- + Number of seconds after which the probe times out. + Defaults to 1 second. Minimum value is 1. + More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes + format: int32 + type: integer + type: object + type: object + description: StartupProbe allows changing the startupProbe + configuration for a given daemon + type: object + type: object mgrCount: enum: - 1 diff --git a/metrics/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/storagecluster_types.go b/metrics/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/storagecluster_types.go index 7f0a34545e..b6753b5941 100644 --- a/metrics/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/storagecluster_types.go +++ b/metrics/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/storagecluster_types.go @@ -216,6 +216,9 @@ type ManageCephCluster struct { // Whether to allow updating the device class after the OSD is initially provisioned AllowDeviceClassUpdate bool `json:"allowDeviceClassUpdate,omitempty"` + + // CephClusterHealthCheckSpec represent the healthcheck for Ceph daemons + HealthCheck *rookCephv1.CephClusterHealthCheckSpec `json:"healthCheck,omitempty"` } // ManageCephConfig defines how to reconcile the Ceph configuration diff --git a/metrics/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/zz_generated.deepcopy.go b/metrics/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/zz_generated.deepcopy.go index 25007bf27e..2fa69d6729 100644 --- a/metrics/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/zz_generated.deepcopy.go +++ b/metrics/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/zz_generated.deepcopy.go @@ -294,6 +294,11 @@ func (in *ManageCephCluster) DeepCopyInto(out *ManageCephCluster) { *out = new(float64) **out = **in } + if in.HealthCheck != nil { + in, out := &in.HealthCheck, &out.HealthCheck + *out = new(ceph_rook_iov1.CephClusterHealthCheckSpec) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ManageCephCluster. diff --git a/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/storagecluster_types.go b/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/storagecluster_types.go index 7f0a34545e..b6753b5941 100644 --- a/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/storagecluster_types.go +++ b/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/storagecluster_types.go @@ -216,6 +216,9 @@ type ManageCephCluster struct { // Whether to allow updating the device class after the OSD is initially provisioned AllowDeviceClassUpdate bool `json:"allowDeviceClassUpdate,omitempty"` + + // CephClusterHealthCheckSpec represent the healthcheck for Ceph daemons + HealthCheck *rookCephv1.CephClusterHealthCheckSpec `json:"healthCheck,omitempty"` } // ManageCephConfig defines how to reconcile the Ceph configuration diff --git a/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/zz_generated.deepcopy.go b/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/zz_generated.deepcopy.go index 25007bf27e..2fa69d6729 100644 --- a/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/zz_generated.deepcopy.go +++ b/vendor/github.com/red-hat-storage/ocs-operator/api/v4/v1/zz_generated.deepcopy.go @@ -294,6 +294,11 @@ func (in *ManageCephCluster) DeepCopyInto(out *ManageCephCluster) { *out = new(float64) **out = **in } + if in.HealthCheck != nil { + in, out := &in.HealthCheck, &out.HealthCheck + *out = new(ceph_rook_iov1.CephClusterHealthCheckSpec) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ManageCephCluster.