Skip to content

Commit

Permalink
add readiness checks (#56)
Browse files Browse the repository at this point in the history
  • Loading branch information
mrIncompetent authored Feb 2, 2018
1 parent ebe4b8f commit bb708b9
Show file tree
Hide file tree
Showing 8 changed files with 201 additions and 56 deletions.
61 changes: 52 additions & 9 deletions Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Gopkg.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,7 @@ required = ["k8s.io/code-generator/cmd/client-gen"]
[[constraint]]
name = "github.com/pmezard/go-difflib"
version = "1.0.0"

[[constraint]]
branch = "master"
name = "github.com/heptiolabs/healthcheck"
23 changes: 18 additions & 5 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,17 @@ import (
"flag"
"fmt"
"net"
"net/http"
"reflect"
"strings"
"time"

"github.com/golang/glog"
"github.com/heptiolabs/healthcheck"
machineclientset "github.com/kubermatic/machine-controller/pkg/client/clientset/versioned"
machineinformers "github.com/kubermatic/machine-controller/pkg/client/informers/externalversions"
"github.com/kubermatic/machine-controller/pkg/controller"
machinehealth "github.com/kubermatic/machine-controller/pkg/health"
"github.com/kubermatic/machine-controller/pkg/machines"
"github.com/kubermatic/machine-controller/pkg/signals"
"github.com/kubermatic/machine-controller/pkg/ssh"
Expand All @@ -38,18 +41,20 @@ import (
)

var (
masterURL string
kubeconfig string
sshKeyName string
clusterDNSIPs string
workerCount int
masterURL string
kubeconfig string
sshKeyName string
clusterDNSIPs string
healthListenAddress string
workerCount int
)

func main() {
flag.StringVar(&kubeconfig, "kubeconfig", "", "Path to a kubeconfig. Only required if out-of-cluster.")
flag.StringVar(&masterURL, "master", "", "The address of the Kubernetes API server. Overrides any value in kubeconfig. Only required if out-of-cluster.")
flag.StringVar(&sshKeyName, "ssh-key-name", "machine-controller", "The name of the private key. This name will be used when a public key will be created at the cloud provider.")
flag.StringVar(&clusterDNSIPs, "cluster-dns", "10.10.10.10", "Comma-separated list of DNS server IP address.")
flag.StringVar(&healthListenAddress, "health-listen-address", "127.0.0.1:8086", "Listen address for the readiness/liveness http server. The endpoints are /live /ready")
flag.IntVar(&workerCount, "worker-count", 5, "Number of workers to process machines. Using a high number with a lot of machines might cause getting rate-limited from your cloud provider.")

flag.Parse()
Expand Down Expand Up @@ -104,6 +109,14 @@ func main() {
}
}

health := healthcheck.NewHandler()
health.AddReadinessCheck("apiserver-connection", machinehealth.ApiserverReachable(kubeClient))
health.AddReadinessCheck("custom-resource-definitions-exist", machinehealth.CustomResourceDefinitionsEstablished(extclient))
for name, c := range c.ReadinessChecks() {
health.AddReadinessCheck(name, c)
}
go http.ListenAndServe(healthListenAddress, health)

if err = c.Run(workerCount, stopCh); err != nil {
glog.Fatalf("Error running controller: %v", err)
}
Expand Down
12 changes: 12 additions & 0 deletions examples/machine-controller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,18 @@ spec:
- -logtostderr
- -v=8
- -cluster-dns=10.10.10.10
- -health-listen-address=0.0.0.0:8086
livenessProbe:
httpGet:
path: /live
port: 8086
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: /ready
port: 8086
periodSeconds: 5
---
apiVersion: v1
kind: ServiceAccount
Expand Down
3 changes: 1 addition & 2 deletions pkg/controller/kubeconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package controller

import (
"fmt"

"time"

"k8s.io/api/core/v1"
Expand All @@ -17,7 +16,7 @@ const (
)

func (c *Controller) getClusterInfoKubeconfig() (*clientcmdapi.Config, error) {
cm, err := c.kubeClient.CoreV1().ConfigMaps(metav1.NamespacePublic).Get("cluster-info", metav1.GetOptions{})
cm, err := c.configMapLister.ConfigMaps(metav1.NamespacePublic).Get("cluster-info")
if err != nil {
return nil, err
}
Expand Down
50 changes: 38 additions & 12 deletions pkg/controller/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@ limitations under the License.
package controller

import (
"errors"
"fmt"
"net"
"regexp"
"time"

"github.com/golang/glog"
"github.com/heptiolabs/healthcheck"
machineclientset "github.com/kubermatic/machine-controller/pkg/client/clientset/versioned"
"github.com/kubermatic/machine-controller/pkg/client/informers/externalversions"
machinelistersv1alpha1 "github.com/kubermatic/machine-controller/pkg/client/listers/machines/v1alpha1"
Expand Down Expand Up @@ -64,10 +66,9 @@ type Controller struct {
kubeClient kubernetes.Interface
machineClient machineclientset.Interface

nodesLister listerscorev1.NodeLister
nodesSynced cache.InformerSynced
machinesLister machinelistersv1alpha1.MachineLister
machinesSynced cache.InformerSynced
nodesLister listerscorev1.NodeLister
configMapLister listerscorev1.ConfigMapLister
machinesLister machinelistersv1alpha1.MachineLister

workqueue workqueue.RateLimitingInterface

Expand All @@ -85,16 +86,16 @@ func NewMachineController(
clusterDNSIPs []net.IP) *Controller {

nodeInformer := kubeInformerFactory.Core().V1().Nodes()
configMapInformer := kubeInformerFactory.Core().V1().ConfigMaps()
machineInformer := machineInformerFactory.Machine().V1alpha1().Machines()

controller := &Controller{
kubeClient: kubeClient,
nodesLister: nodeInformer.Lister(),
nodesSynced: nodeInformer.Informer().HasSynced,
kubeClient: kubeClient,
nodesLister: nodeInformer.Lister(),
configMapLister: configMapInformer.Lister(),

machineClient: machineClient,
machinesLister: machineInformer.Lister(),
machinesSynced: machineInformer.Informer().HasSynced,

workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.NewItemFastSlowRateLimiter(2*time.Second, 10*time.Second, 5), "Machines"),

Expand Down Expand Up @@ -130,10 +131,6 @@ func (c *Controller) Run(threadiness int, stopCh <-chan struct{}) error {
defer runtime.HandleCrash()
defer c.workqueue.ShutDown()

if ok := cache.WaitForCacheSync(stopCh, c.nodesSynced, c.machinesSynced); !ok {
return fmt.Errorf("failed to wait for caches to sync")
}

for i := 0; i < threadiness; i++ {
go wait.Until(c.runWorker, time.Second, stopCh)
}
Expand Down Expand Up @@ -573,3 +570,32 @@ func (c *Controller) handleObject(obj interface{}) {
return
}
}

func (c *Controller) ReadinessChecks() map[string]healthcheck.Check {
return map[string]healthcheck.Check{
"valid-info-kubeconfig": func() error {
cm, err := c.getClusterInfoKubeconfig()
if err != nil {
return err
}
if len(cm.Clusters) != 1 {
err := errors.New("invalid kubeconfig: no clusters found")
glog.V(2).Info(err)
return err
}
for name, c := range cm.Clusters {
if len(c.CertificateAuthorityData) == 0 {
err := fmt.Errorf("invalid kubeconfig: no certificate authority data was specified for kuberconfig.clusters.['%s']", name)
glog.V(2).Info(err)
return err
}
if len(c.Server) == 0 {
err := fmt.Errorf("invalid kubeconfig: no server was specified for kuberconfig.clusters.['%s']", name)
glog.V(2).Info(err)
return err
}
}
return nil
},
}
}
31 changes: 31 additions & 0 deletions pkg/health/readiness.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package health

import (
"errors"

"github.com/heptiolabs/healthcheck"
"github.com/kubermatic/machine-controller/pkg/machines"
apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
)

func ApiserverReachable(client kubernetes.Interface) healthcheck.Check {
return func() error {
_, err := client.CoreV1().Namespaces().List(metav1.ListOptions{})
return err
}
}

func CustomResourceDefinitionsEstablished(clientset apiextensionsclient.Interface) healthcheck.Check {
return func() error {
exist, err := machines.AllCustomResourceDefinitionsExists(clientset)
if err != nil {
return err
}
if !exist {
return errors.New("custom resource definitions do not exist / are established")
}
return nil
}
}
Loading

0 comments on commit bb708b9

Please sign in to comment.