Skip to content

Commit

Permalink
Filter pods where the Akka container has 'waiting' status (#891)
Browse files Browse the repository at this point in the history
* Filter pods where the Akka container has 'waiting' status

We wait until pods become 'Running', but when using pods with multiple
containers, the pod becomes 'Running' as soon as the first primary
container starts.

https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/

In that case, when the Akka container fails to start, it also prevents
other pods (where the issue might be resolved) from starting. For that
reason, allow configuring which container is the Akka container, so we
can ignore it from discovery when it is 'waiting'.

Refs #890

* scalafmt

* mima exclusions
  • Loading branch information
raboof authored Apr 26, 2021
1 parent 329d794 commit 12a6933
Show file tree
Hide file tree
Showing 9 changed files with 361 additions and 24 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# private[kubernetes]
ProblemFilters.exclude[DirectMissingMethodProblem]("akka.discovery.kubernetes.KubernetesApiServiceDiscovery.targets")
ProblemFilters.exclude[Problem]("akka.discovery.kubernetes.PodList#*")
ProblemFilters.exclude[Problem]("akka.discovery.kubernetes.PodList$*")
3 changes: 3 additions & 0 deletions discovery-kubernetes-api/src/main/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,8 @@ akka.discovery {

# Enables the usage of the raw IP instead of the composed value for the resolved target host
use-raw-ip = false

# When set, validate the container is not in 'waiting' state
container-name = ""
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ import spray.json._
implicit val containerPortFormat: JsonFormat[ContainerPort] = jsonFormat2(ContainerPort)
implicit val containerFormat: JsonFormat[Container] = jsonFormat2(Container)
implicit val podSpecFormat: JsonFormat[PodSpec] = jsonFormat1(PodSpec)
implicit val podStatusFormat: JsonFormat[PodStatus] = jsonFormat2(PodStatus)
implicit val containerStatusFormat: JsonFormat[ContainerStatus] = jsonFormat2(ContainerStatus)
implicit val podStatusFormat: JsonFormat[PodStatus] = jsonFormat3(PodStatus)
implicit val metadataFormat: JsonFormat[Metadata] = jsonFormat1(Metadata)
implicit val podFormat: JsonFormat[Pod] = jsonFormat3(Pod)
implicit val podListFormat: RootJsonFormat[PodList] = jsonFormat1(PodList.apply)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,16 @@ object KubernetesApiServiceDiscovery {
portName: Option[String],
podNamespace: String,
podDomain: String,
rawIp: Boolean): Seq[ResolvedTarget] =
rawIp: Boolean,
containerName: Option[String]): Seq[ResolvedTarget] =
for {
item <- podList.items
if item.metadata.flatMap(_.deletionTimestamp).isEmpty
itemSpec <- item.spec.toSeq
itemStatus <- item.status.toSeq
if itemStatus.phase.contains("Running")
if containerName.forall(name =>
itemStatus.containerStatuses.filter(_.name == name).exists(!_.state.contains("waiting")))
ip <- itemStatus.podIP.toSeq
// Maybe port is an Option of a port, and will be None if no portName was requested
maybePort <- portName match {
Expand Down Expand Up @@ -156,7 +159,8 @@ class KubernetesApiServiceDiscovery(implicit system: ActorSystem) extends Servic
}

} yield {
val addresses = targets(podList, query.portName, podNamespace, settings.podDomain, settings.rawIp)
val addresses =
targets(podList, query.portName, podNamespace, settings.podDomain, settings.rawIp, settings.containerName)
if (addresses.isEmpty && podList.items.nonEmpty) {
if (log.isInfoEnabled) {
val containerPortNames = podList.items.flatMap(_.spec).flatMap(_.containers).flatMap(_.ports).flatten.toSet
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import akka.annotation.InternalApi
/**
* INTERNAL API
*/
@InternalApi private[akka] object PodList {
@InternalApi private[kubernetes] object PodList {
final case class Metadata(deletionTimestamp: Option[String])

final case class ContainerPort(name: Option[String], containerPort: Int)
Expand All @@ -19,9 +19,17 @@ import akka.annotation.InternalApi

final case class PodSpec(containers: immutable.Seq[Container])

final case class PodStatus(podIP: Option[String], phase: Option[String])
final case class ContainerStatus(name: String, state: Map[String, Unit])

final case class PodStatus(
podIP: Option[String],
containerStatuses: immutable.Seq[ContainerStatus],
phase: Option[String])

final case class Pod(spec: Option[PodSpec], status: Option[PodStatus], metadata: Option[Metadata])
}

final case class PodList(items: immutable.Seq[PodList.Pod])
/**
* INTERNAL API
*/
@InternalApi private[kubernetes] final case class PodList(items: immutable.Seq[PodList.Pod])
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ final class Settings(system: ExtendedActorSystem) extends Extension {

lazy val rawIp: Boolean = kubernetesApi.getBoolean("use-raw-ip")

val containerName: Option[String] = Some(kubernetesApi.getString("container-name")).filter(_.nonEmpty)

override def toString =
s"Settings($apiCaPath, $apiTokenPath, $apiServiceHostEnvName, $apiServicePortEnvName, " +
s"$podNamespacePath, $podNamespace, $podDomain)"
Expand Down
266 changes: 266 additions & 0 deletions discovery-kubernetes-api/src/test/resources/multi-container-pod.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
{
"kind": "PodList",
"apiVersion": "v1",
"metadata": {
"selfLink": "/api/v1/namespaces/default/pods",
"resourceVersion": "16042"
},
"items": [
{
"metadata": {
"annotations": {
"cloudstate.io/enabled": "true",
"cloudstate.io/stateful-service-config": "ss-cfg-mesh",
"cloudstate.io/stateful-store": "default-spanner-store",
"cluster-autoscaler.kubernetes.io/safe-to-evict": "true"
},
"creationTimestamp": "2021-04-23T08:45:58Z",
"generateName": "mesh-b4b946946-",
"labels": {
"app.kubernetes.io/component": "user-function",
"app.kubernetes.io/managed-by": "cloudstate-operator",
"app.kubernetes.io/name": "mesh",
"cloudstate.io/stateful-service": "mesh",
"pod-template-hash": "b4b946946"
},
"name": "mesh-b4b946946-g8vfj",
"namespace": "b58dbc88-3651-4fb4-8408-60c375592d1d",
"ownerReferences": [
{
"apiVersion": "apps/v1",
"blockOwnerDeletion": true,
"controller": true,
"kind": "ReplicaSet",
"name": "mesh-b4b946946",
"uid": "3bb114e8-8a07-449f-818b-d0fa959a8681"
}
],
"resourceVersion": "68638988",
"selfLink": "/api/v1/namespaces/b58dbc88-3651-4fb4-8408-60c375592d1d/pods/mesh-b4b946946-g8vfj",
"uid": "2ecb33a7-bac6-4a0f-8717-c3ffde85ddd9"
},
"spec": {
"containers": [
{
"env": [
{
"name": "PORT",
"value": "8080"
}
],
"image": "docker.io/raboof/akkaserverless-wirelessmesh-java:349b1b7",
"imagePullPolicy": "IfNotPresent",
"name": "user-function",
"ports": [
{
"containerPort": 8080,
"name": "user-port",
"protocol": "TCP"
}
],
"resources": {
"limits": {
"memory": "512Mi"
},
"requests": {
"cpu": "400m",
"memory": "512Mi"
}
},
"terminationMessagePath": "/dev/termination-log",
"terminationMessagePolicy": "File"
},
{
"imagePullPolicy": "IfNotPresent",
"livenessProbe": {
"failureThreshold": 5,
"httpGet": {
"path": "/alive",
"port": 8558,
"scheme": "HTTP"
},
"periodSeconds": 10,
"successThreshold": 1,
"timeoutSeconds": 1
},
"name": "cloudstate-sidecar",
"ports": [
{
"containerPort": 8013,
"name": "grpc-http-proxy",
"protocol": "TCP"
},
{
"containerPort": 9090,
"name": "cs-metrics",
"protocol": "TCP"
}
],
"readinessProbe": {
"failureThreshold": 5,
"httpGet": {
"path": "/ready",
"port": 8558,
"scheme": "HTTP"
},
"periodSeconds": 10,
"successThreshold": 1,
"timeoutSeconds": 1
},
"resources": {
"limits": {
"memory": "512Mi"
},
"requests": {
"cpu": "400m",
"memory": "512Mi"
}
},
"terminationMessagePath": "/dev/termination-log",
"terminationMessagePolicy": "File"
},
{
"args": [
"proxy",
"sidecar"
],
"name": "ppp-proxy",
"readinessProbe": {
"failureThreshold": 30,
"httpGet": {
"path": "/healthz/ready",
"port": 15020,
"scheme": "HTTP"
},
"initialDelaySeconds": 1,
"periodSeconds": 2,
"successThreshold": 1,
"timeoutSeconds": 1
},
"resources": {
"limits": {
"cpu": "2",
"memory": "1Gi"
},
"requests": {
"cpu": "100m",
"memory": "128Mi"
}
},
"securityContext": {
"allowPrivilegeEscalation": false,
"capabilities": {
"drop": [
"ALL"
]
},
"privileged": false,
"readOnlyRootFilesystem": true,
"runAsNonRoot": true
},
"terminationMessagePath": "/dev/termination-log",
"terminationMessagePolicy": "File"
}
],
"dnsPolicy": "ClusterFirst",
"enableServiceLinks": true,
"nodeName": "gke-dev-us-east-executio-default-pool-4678c5fa-mhx9",
"priority": 0,
"restartPolicy": "Always",
"schedulerName": "default-scheduler",
"securityContext": {},
"serviceAccount": "cloudstate-pod-service-account",
"serviceAccountName": "cloudstate-pod-service-account",
"terminationGracePeriodSeconds": 30
},
"status": {
"conditions": [
{
"lastProbeTime": null,
"lastTransitionTime": "2021-04-23T08:46:00Z",
"status": "True",
"type": "Initialized"
},
{
"lastProbeTime": null,
"lastTransitionTime": "2021-04-23T08:45:58Z",
"message": "containers with unready status: [cloudstate-sidecar]",
"reason": "ContainersNotReady",
"status": "False",
"type": "Ready"
},
{
"lastProbeTime": null,
"lastTransitionTime": "2021-04-23T08:45:58Z",
"message": "containers with unready status: [cloudstate-sidecar]",
"reason": "ContainersNotReady",
"status": "False",
"type": "ContainersReady"
},
{
"lastProbeTime": null,
"lastTransitionTime": "2021-04-23T08:45:58Z",
"status": "True",
"type": "PodScheduled"
}
],
"containerStatuses": [
{
"lastState": {
"terminated": {
"exitCode": 0,
"finishedAt": "2021-04-23T12:41:11Z",
"reason": "Completed",
"startedAt": "2021-04-23T12:41:04Z"
}
},
"name": "cloudstate-sidecar",
"ready": false,
"restartCount": 50,
"started": false,
"state": {
"waiting": {
"message": "back-off 5m0s restarting failed container=cloudstate-sidecar pod=mesh-b4b946946-g8vfj_b58dbc88-3651-4fb4-8408-60c375592d1d(2ecb33a7-bac6-4a0f-8717-c3ffde85ddd9)",
"reason": "CrashLoopBackOff"
}
}
},
{
"lastState": {},
"name": "ppp-proxy",
"ready": true,
"restartCount": 0,
"started": true,
"state": {
"running": {
"startedAt": "2021-04-23T08:46:01Z"
}
}
},
{
"lastState": {},
"name": "user-function",
"ready": true,
"restartCount": 0,
"started": true,
"state": {
"running": {
"startedAt": "2021-04-23T08:46:01Z"
}
}
}
],
"hostIP": "10.14.0.85",
"phase": "Running",
"podIP": "10.8.7.9",
"podIPs": [
{
"ip": "10.8.7.9"
}
],
"qosClass": "Burstable",
"startTime": "2021-04-23T08:45:58Z"
}
}
]
}
Loading

0 comments on commit 12a6933

Please sign in to comment.