From e43e38e033a6079b72e4db2491f4992ed587282e Mon Sep 17 00:00:00 2001 From: Andi Skrgat Date: Mon, 2 Dec 2024 14:13:47 +0100 Subject: [PATCH] Add load balancer service (#90) --- charts/memgraph-high-availability/README.md | 3 +- .../memgraph-high-availability/aks/README.md | 143 ++++++++++++++++++ .../templates/NOTES.txt | 28 +++- .../templates/coordinators.yaml | 2 - .../templates/data.yaml | 2 - .../templates/load-balancer.yaml | 15 -- .../templates/services-coordinators.yaml | 29 +++- .../templates/services-data.yaml | 11 +- charts/memgraph-high-availability/values.yaml | 7 +- 9 files changed, 207 insertions(+), 33 deletions(-) create mode 100644 charts/memgraph-high-availability/aks/README.md diff --git a/charts/memgraph-high-availability/README.md b/charts/memgraph-high-availability/README.md index b02539f..c99d203 100644 --- a/charts/memgraph-high-availability/README.md +++ b/charts/memgraph-high-availability/README.md @@ -46,7 +46,8 @@ The following table lists the configurable parameters of the Memgraph chart and | `memgraph.coordinators.volumeClaim.storagePVCSize` | Size of the storage PVC for coordinators | `1Gi` | | `memgraph.coordinators.volumeClaim.logPVC` | Enable log PVC for coordinators | `false` | | `memgraph.coordinators.volumeClaim.logPVCSize` | Size of the log PVC for coordinators | `256Mi` | -| `memgraph.externalAccess.serviceType` | NodePort or LoadBalancer. Use LoadBalancer for Cloud production deployment and NodePort for local testing | `LoadBalancer` | +| `memgraph.externalAccess.coordinator.serviceType` | NodePort, CommonLoadBalancer or LoadBalancer. Use LoadBalancer for Cloud production deployment and NodePort for local testing. 'CommonLoadBalancer' will open one load balancer for all coordinators while 'LoadBalancer' will open one load balancer for each coordinators. | `NodePort` | +| `memgraph.externalAccess.dataInstance.serviceType` | NodePort or LoadBalancer. Use LoadBalancer for Cloud production deployment and NodePort for local testing. | `NodePort` | | `memgraph.ports.boltPort` | Bolt port used on coordinator and data instances. | `7687` | | `memgraph.ports.managementPort` | Management port used on coordinator and data instances. | `10000` | | `memgraph.ports.replicationPort` | Replication port used on data instances. | `20000` | diff --git a/charts/memgraph-high-availability/aks/README.md b/charts/memgraph-high-availability/aks/README.md new file mode 100644 index 0000000..82ae118 --- /dev/null +++ b/charts/memgraph-high-availability/aks/README.md @@ -0,0 +1,143 @@ +## Description + +This guide instructs users on how to deploy Memgraph HA to Azure AKS. It serves only as a starting point and there are many ways possible to extend +what is currently here. In this setup each Memgraph database is deployed to separate, `Standard_A2_v2` node. + +## Installation + +You will need: +- [azure-cli](https://learn.microsoft.com/en-us/cli/azure/) +- [kubectl](https://kubernetes.io/docs/tasks/tools/) +- [helm](https://helm.sh/docs/intro/install/) + +We used `azure-cli 2.67.0, kubectl v1.30.0 and helm 3.14.4`. + +## Login with Azure-CLI + +Use `az login` and enter your authentication details. + +## Create resource group + +The next step involves creating resource group which will later be attached to Kubernetes cluster. Example: +``` +az group create --name ResourceGroup2 --location northeurope +``` + +## Provision K8 nodes + +After creating resource group, K8 nodes can be created and attached to the previously created resource group. There are many other options +you can use but we will cover here the simplest deployment scenario in which will we use 5 'Standard_A2_v2' instances where each instance will +host its own Memgraph database. + +``` +az aks create --resource-group ResourceGroup2 --name memgraph-ha --node-count 5 --node-vm-size Standard_A2_v2 --generate-ssh-keys +``` + +## Configure kubectl + +To get remote context from Azure AKS into your local kubectl, use: +``` +az aks get-credentials --resource-group ResourceGroup2 --name memgraph-ha +``` + +## Label nodes + +By running `kubectl get nodes -o=wide`, you should be able to see your nodes. Example: + +| NAME | STATUS | ROLES | AGE | VERSION | INTERNAL-IP | EXTERNAL-IP | OS-IMAGE | KERNEL-VERSION | CONTAINER-RUNTIME | +|-----------------------------------|--------|-------|-----|---------|-------------|-------------|--------------------|----------------------|------------------------| +| aks-nodepool1-65392319-vmss000000 | Ready | | 11m | v1.29.9 | 10.224.0.4 | | Ubuntu 22.04.5 LTS | 5.15.0-1074-azure | containerd://1.7.23-1 | +| aks-nodepool1-65392319-vmss000001 | Ready | | 12m | v1.29.9 | 10.224.0.8 | | Ubuntu 22.04.5 LTS | 5.15.0-1074-azure | containerd://1.7.23-1 | +| aks-nodepool1-65392319-vmss000002 | Ready | | 12m | v1.29.9 | 10.224.0.6 | | Ubuntu 22.04.5 LTS | 5.15.0-1074-azure | containerd://1.7.23-1 | +| aks-nodepool1-65392319-vmss000003 | Ready | | 11m | v1.29.9 | 10.224.0.5 | | Ubuntu 22.04.5 LTS | 5.15.0-1074-azure | containerd://1.7.23-1 | +| aks-nodepool1-65392319-vmss000004 | Ready | | 11m | v1.29.9 | 10.224.0.7 | | Ubuntu 22.04.5 LTS | 5.15.0-1074-azure | containerd://1.7.23-1 | + +Most often users will use smaller nodes for 3 coordinators and bigger nodes for data instances. To be able to do that, we will label first +3 nodes with `role=coordinator-node` and the last 2 with `role=data-node`. + +``` +kubectl label nodes aks-nodepool1-65392319-vmss000000 role=coordinator-node +kubectl label nodes aks-nodepool1-65392319-vmss000001 role=coordinator-node +kubectl label nodes aks-nodepool1-65392319-vmss000002 role=coordinator-node +kubectl label nodes aks-nodepool1-65392319-vmss000003 role=data-node +kubectl label nodes aks-nodepool1-65392319-vmss000004 role=data-node +``` + +## Deploy Memgraph HA + +After preparing nodes, we can deploy Memgraph HA cluster by using `helm install` command. We will specify affinity options so that node labels +are used and so that each data and coordinator instance is exposed through LoadBalancer. + +``` +helm install mem-ha-test ./charts/memgraph-high-availability --set \ +memgraph.env.MEMGRAPH_ENTERPRISE_LICENSE=,\ +memgraph.env.MEMGRAPH_ORGANIZATION_NAME=Memgraph,memgraph.affinity.nodeSelection=true,\ +memgraph.externalAccessConfig.dataInstance.serviceType=LoadBalancer,memgraph.externalAccessConfig.coordinator.serviceType=LoadBalancer +``` + +By running `kubectl get svc -o=wide` and `kubectl get pods -o=wide` we can verify that deployment finished successfully. Example: + +| NAME | TYPE | CLUSTER-IP | EXTERNAL-IP | PORT(S) | AGE | SELECTOR | +|---------------------------------|--------------|--------------|-----------------|----------------------------------|-----|----------------------------| +| kubernetes | ClusterIP | 10.0.0.1 | `` | 443/TCP | 21m | `` | +| memgraph-coordinator-1 | ClusterIP | 10.0.65.178 | `` | 7687/TCP,12000/TCP,10000/TCP | 63s | app=memgraph-coordinator-1 | +| memgraph-coordinator-1-external | LoadBalancer | 10.0.28.222 | 172.205.93.228 | 7687:30402/TCP | 63s | app=memgraph-coordinator-1 | +| memgraph-coordinator-2 | ClusterIP | 10.0.129.252 | `` | 7687/TCP,12000/TCP,10000/TCP | 63s | app=memgraph-coordinator-2 | +| memgraph-coordinator-2-external | LoadBalancer | 10.0.102.4 | 4.209.216.240 | 7687:32569/TCP | 63s | app=memgraph-coordinator-2 | +| memgraph-coordinator-3 | ClusterIP | 10.0.42.32 | `` | 7687/TCP,12000/TCP,10000/TCP | 63s | app=memgraph-coordinator-3 | +| memgraph-coordinator-3-external | LoadBalancer | 10.0.208.244 | 68.219.15.104 | 7687:30874/TCP | 63s | app=memgraph-coordinator-3 | +| memgraph-data-0 | ClusterIP | 10.0.227.204 | `` | 7687/TCP,10000/TCP,20000/TCP | 63s | app=memgraph-data-0 | +| memgraph-data-0-external | LoadBalancer | 10.0.78.197 | 68.219.11.242 | 7687:31823/TCP | 63s | app=memgraph-data-0 | +| memgraph-data-1 | ClusterIP | 10.0.251.227 | `` | 7687/TCP,10000/TCP,20000/TCP | 63s | app=memgraph-data-1 | +| memgraph-data-1-external | LoadBalancer | 10.0.147.131 | 68.219.13.145 | 7687:30733/TCP | 63s | app=memgraph-data-1 | + + +| NAME | READY | STATUS | RESTARTS | AGE | IP | NODE | NOMINATED NODE | READINESS GATES | +|-----------------------------|-------|---------|----------|-----|------------|------------------------------------|----------------|-----------------| +| memgraph-coordinator-1-0 | 1/1 | Running | 0 | 80s | 10.244.0.3 | aks-nodepool1-65392319-vmss000001 | `` | `` | +| memgraph-coordinator-2-0 | 1/1 | Running | 0 | 80s | 10.244.3.3 | aks-nodepool1-65392319-vmss000000 | `` | `` | +| memgraph-coordinator-3-0 | 1/1 | Running | 0 | 80s | 10.244.1.8 | aks-nodepool1-65392319-vmss000002 | `` | `` | +| memgraph-data-0-0 | 1/1 | Running | 0 | 80s | 10.244.4.3 | aks-nodepool1-65392319-vmss000004 | `` | `` | +| memgraph-data-1-0 | 1/1 | Running | 0 | 80s | 10.244.2.2 | aks-nodepool1-65392319-vmss000003 | `` | `` | + +## Connect cluster + +The only remaining step left is to connect instances. For this we will use Memgraph Lab. Open Lab and use Memgraph instance type of connection. +For the host enter external ip of `memgraph-coordinator-1-external` and port is 7687. Both for adding coordinators and registering instances, +we only need to change 'bolt\_server' part to use LoadBalancers' external IP. + +``` +ADD COORDINATOR 2 WITH CONFIG {"bolt_server": "4.209.216.240:7687", "management_server": "memgraph-coordinator-2.default.svc.cluster.local:10000", "coordinator_server": "memgraph-coordinator-2.default.svc.cluster.local:12000"}; +ADD COORDINATOR 3 WITH CONFIG {"bolt_server": "68.219.15.104:7687", "management_server": "memgraph-coordinator-3.default.svc.cluster.local:10000", "coordinator_server": "memgraph-coordinator-3.default.svc.cluster.local:12000"}; +REGISTER INSTANCE instance_1 WITH CONFIG {"bolt_server": "68.219.11.242:7687", "management_server": "memgraph-data-0.default.svc.cluster.local:10000", "replication_server": "memgraph-data-0.default.svc.cluster.local:20000"}; +REGISTER INSTANCE instance_2 WITH CONFIG {"bolt_server": "68.219.13.145:7687", "management_server": "memgraph-data-1.default.svc.cluster.local:10000", "replication_server": "memgraph-data-1.default.svc.cluster.local:20000"}; +SET INSTANCE instance_1 TO MAIN; +``` + +The output of `SHOW INSTANCES` should then look similar to: + +``` +| name | bolt_server | coordinator_server | management_server | health | role | last_succ_resp_ms | +|---------------|---------------------------------------------------------|---------------------------------------------------------|---------------------------------------------------------|--------|----------|-------------------| +| "coordinator_1" | "memgraph-coordinator-1.default.svc.cluster.local:7687" | "memgraph-coordinator-1.default.svc.cluster.local:12000" | "memgraph-coordinator-1.default.svc.cluster.local:10000" | "up" | "leader" | 0 | +| "coordinator_2" | "4.209.216.240:7687" | "memgraph-coordinator-2.default.svc.cluster.local:12000" | "memgraph-coordinator-2.default.svc.cluster.local:10000" | "up" | "follower"| 550 | +| "coordinator_3" | "68.219.15.104:7687" | "memgraph-coordinator-3.default.svc.cluster.local:12000" | "memgraph-coordinator-3.default.svc.cluster.local:10000" | "up" | "follower"| 26 | +| "instance_1" | "68.219.11.242:7687" | "" | "memgraph-data-0.default.svc.cluster.local:10000" | "up" | "main" | 917 | +| "instance_2" | "68.219.13.145:7687" | "" | "memgraph-data-1.default.svc.cluster.local:10000" | "up" | "replica" | 266 | +``` + +## Using CommonLoadBalancer + +When using 'CommonLoadBalancer', all three coordinators will be behind a single LoadBalancer. To connect the cluster, open Lab and use Memgraph +instance type of connection. For the host enter external IP of `memgraph-coordinator-1-external` and port is 7687. Again, we only need to change +'bolt\_server' part to use LoadBalancers' external IP. When connecting to CommonLoadBalancer, K8 will automatically route you to one of coordinators. +To see on which coordinator did you end route, run `show instances`. If for example, the output of show instances says you are connected to +coordinator 2, we need to add coordinators 1 and 3. Registering data instances stays exactly the same. + +``` +ADD COORDINATOR 1 WITH CONFIG {"bolt_server": ":7687", "management_server": "memgraph-coordinator-1.default.svc.cluster.local:10000", "coordinator_server": "memgraph-coordinator-1.default.svc.cluster.local:12000"}; +ADD COORDINATOR 3 WITH CONFIG {"bolt_server": "CommonLoadBalancer-IP:7687", "management_server": "memgraph-coordinator-3.default.svc.cluster.local:10000", "coordinator_server": "memgraph-coordinator-3.default.svc.cluster.local:12000"}; +REGISTER INSTANCE instance_1 WITH CONFIG {"bolt_server": "68.219.11.242:7687", "management_server": "memgraph-data-0.default.svc.cluster.local:10000", "replication_server": "memgraph-data-0.default.svc.cluster.local:20000"}; +REGISTER INSTANCE instance_2 WITH CONFIG {"bolt_server": "68.219.13.145:7687", "management_server": "memgraph-data-1.default.svc.cluster.local:10000", "replication_server": "memgraph-data-1.default.svc.cluster.local:20000"}; +SET INSTANCE instance_1 TO MAIN; +``` diff --git a/charts/memgraph-high-availability/templates/NOTES.txt b/charts/memgraph-high-availability/templates/NOTES.txt index 035bdac..9e2eb72 100644 --- a/charts/memgraph-high-availability/templates/NOTES.txt +++ b/charts/memgraph-high-availability/templates/NOTES.txt @@ -8,13 +8,29 @@ The cluster setup requires the proper enterprise license to work since HA is an You can connect to Memgraph instances via Lab, mgconsole, or any other client. By default, all Memgraph instances (coordinators and data instances) listen on port 7687 for a bolt connection. Make sure your are connecting to the correct ip address and port. For details check the configuration on your cloud provider(aws, gcp, azure, etc.) -To start, you should add coordinators and register data instances in order to completely setup cluster. In both cases you only need to modify 'bolt_server' part and set it to the DNS -of the node on which instance is being started. Node ports are fixed. Example: +To start, you should add coordinators and register data instances in order to completely setup cluster. Depending on whether you use LoadBalancers or +NodePorts to expose your service, queries will be slightly different. In both cases you only need to modify 'bolt_server' part of the query while +'management_server', 'coordinator_server' and 'replication_server' will stay the same. If you are connecting via Lab, select 'Memgraph instance' +type of connection when adding instances to the cluster. + + +NodePort configuration example + +ADD COORDINATOR 2 WITH CONFIG {"bolt_server": ":", "management_server": "memgraph-coordinator-2.default.svc.cluster.local:10000", "coordinator_server": "memgraph-coordinator-2.default.svc.cluster.local:12000"}; +REGISTER INSTANCE instance_1 WITH CONFIG {"bolt_server": ":", "management_server": "memgraph-data-0.default.svc.cluster.local:10000", "replication_server": "memgraph-data-0.default.svc.cluster.local:20000"}; + + +LoadBalancer configuration example + +ADD COORDINATOR 2 WITH CONFIG {"bolt_server": ":7687", "management_server": "memgraph-coordinator-2.default.svc.cluster.local:10000", "coordinator_server": "memgraph-coordinator-2.default.svc.cluster.local:12000"}; +REGISTER INSTANCE instance_1 WITH CONFIG {"bolt_server": ":7687", "management_server": "memgraph-data-0.default.svc.cluster.local:10000", "replication_server": "memgraph-data-0.default.svc.cluster.local:20000"}; + + +CommonLoadBalancer configuration example + +ADD COORDINATOR 2 WITH CONFIG {"bolt_server": ":7687", "management_server": "memgraph-coordinator-2.default.svc.cluster.local:10000", "coordinator_server": "memgraph-coordinator-2.default.svc.cluster.local:12000"}; +REGISTER INSTANCE instance_1 WITH CONFIG {"bolt_server": ":7687", "management_server": "memgraph-data-0.default.svc.cluster.local:10000", "replication_server": "memgraph-data-0.default.svc.cluster.local:20000"}; -ADD COORDINATOR 2 WITH CONFIG {"bolt_server": ":32002", "management_server": "memgraph-coordinator-2.default.svc.cluster.local:10000", "coordinator_server": "memgraph-coordinator-2.default.svc.cluster.local:12000"}; -ADD COORDINATOR 3 WITH CONFIG {"bolt_server": ":32003", "management_server": "memgraph-coordinator-3.default.svc.cluster.local:10000", "coordinator_server": "memgraph-coordinator-3.default.svc.cluster.local:12000"}; -REGISTER INSTANCE instance_1 WITH CONFIG {"bolt_server": ":32010", "management_server": "memgraph-data-0.default.svc.cluster.local:10000", "replication_server": "memgraph-data-0.default.svc.cluster.local:20000"}; -REGISTER INSTANCE instance_2 WITH CONFIG {"bolt_server": ":32011", "management_server": "memgraph-data-1.default.svc.cluster.local:10000", "replication_server": "memgraph-data-1.default.svc.cluster.local:20000"}; If you are connecting via Lab, specify your coordinator instance IP address and port in Memgraph Lab GUI and select Memgraph HA cluster type of connection. diff --git a/charts/memgraph-high-availability/templates/coordinators.yaml b/charts/memgraph-high-availability/templates/coordinators.yaml index b36325b..26d7151 100644 --- a/charts/memgraph-high-availability/templates/coordinators.yaml +++ b/charts/memgraph-high-availability/templates/coordinators.yaml @@ -15,8 +15,6 @@ spec: labels: app: memgraph-coordinator-{{ $coordinator.id }} role: coordinator - instance-type: coordinator - spec: affinity: {{- if $.Values.memgraph.affinity.nodeSelection }} diff --git a/charts/memgraph-high-availability/templates/data.yaml b/charts/memgraph-high-availability/templates/data.yaml index e59859b..678e3db 100644 --- a/charts/memgraph-high-availability/templates/data.yaml +++ b/charts/memgraph-high-availability/templates/data.yaml @@ -15,8 +15,6 @@ spec: labels: app: memgraph-data-{{ $data.id }} role: data - instance-type: data - spec: affinity: {{- if $.Values.memgraph.affinity.nodeSelection }} diff --git a/charts/memgraph-high-availability/templates/load-balancer.yaml b/charts/memgraph-high-availability/templates/load-balancer.yaml index c8c497f..e69de29 100644 --- a/charts/memgraph-high-availability/templates/load-balancer.yaml +++ b/charts/memgraph-high-availability/templates/load-balancer.yaml @@ -1,15 +0,0 @@ -{{- if $.Values.memgraph.coordLoadBalancer.enabled}} -apiVersion: v1 -kind: Service -metadata: - name: external-access -spec: - type: LoadBalancer - selector: - instance-type: coordinator - ports: - - protocol: TCP - name: bolt - port: {{ $.Values.memgraph.ports.boltPort }} - targetPort: {{ $.Values.memgraph.ports.boltPort }} -{{- end }} diff --git a/charts/memgraph-high-availability/templates/services-coordinators.yaml b/charts/memgraph-high-availability/templates/services-coordinators.yaml index 13ed7aa..d620103 100644 --- a/charts/memgraph-high-availability/templates/services-coordinators.yaml +++ b/charts/memgraph-high-availability/templates/services-coordinators.yaml @@ -1,4 +1,9 @@ -# Service for coordinator instances internal +{{- $validCoordServices := list "CommonLoadBalancer" "LoadBalancer" "NodePort" }} +{{- if not (has .Values.memgraph.externalAccessConfig.coordinator.serviceType $validCoordServices) }} +{{- fail "Invalid environment value for memgraph.externlAccessConfig.coordinator.serviceType. Use 'CommonLoadBalancer', 'LoadBalancer' or 'NodePort'."}} +{{- end }} + + {{- range .Values.coordinators }} --- apiVersion: v1 @@ -24,7 +29,21 @@ spec: targetPort: {{ $.Values.memgraph.ports.managementPort }} {{- end }} -# Service for coordinators instances external +{{ if eq $.Values.memgraph.externalAccessConfig.coordinator.serviceType "CommonLoadBalancer"}} +apiVersion: v1 +kind: Service +metadata: + name: coordinators +spec: + type: LoadBalancer + selector: + role: coordinator + ports: + - protocol: TCP + name: bolt + port: {{ $.Values.memgraph.ports.boltPort }} + targetPort: {{ $.Values.memgraph.ports.boltPort }} +{{ else }} {{- range .Values.coordinators }} --- apiVersion: v1 @@ -32,7 +51,11 @@ kind: Service metadata: name: memgraph-coordinator-{{ .id }}-external spec: +{{ if eq $.Values.memgraph.externalAccessConfig.coordinator.serviceType "LoadBalancer"}} + type: LoadBalancer +{{ else }} type: NodePort +{{ end }} selector: app: memgraph-coordinator-{{ .id }} ports: @@ -40,5 +63,5 @@ spec: name: bolt port: {{ $.Values.memgraph.ports.boltPort }} targetPort: {{ $.Values.memgraph.ports.boltPort }} - nodePort: {{ add 32000 .id }} {{- end }} +{{ end}} diff --git a/charts/memgraph-high-availability/templates/services-data.yaml b/charts/memgraph-high-availability/templates/services-data.yaml index e9c3569..bb0bd7b 100644 --- a/charts/memgraph-high-availability/templates/services-data.yaml +++ b/charts/memgraph-high-availability/templates/services-data.yaml @@ -1,4 +1,8 @@ -# Service for data instances internal +{{- $validDataServices := list "LoadBalancer" "NodePort" }} +{{- if not (has .Values.memgraph.externalAccessConfig.dataInstance.serviceType $validDataServices) }} +{{- fail "Invalid environment value for memgraph.externalAccessConfig.dataInstance.serviceType. Use 'LoadBalancer' or 'NodePort'."}} +{{- end }} + {{- range .Values.data }} --- apiVersion: v1 @@ -32,7 +36,11 @@ kind: Service metadata: name: memgraph-data-{{ .id }}-external spec: +{{ if eq $.Values.memgraph.externalAccessConfig.dataInstance.serviceType "NodePort"}} type: NodePort +{{ else }} + type: LoadBalancer +{{ end }} selector: app: memgraph-data-{{ .id }} ports: @@ -40,5 +48,4 @@ spec: name: bolt port: {{ $.Values.memgraph.ports.boltPort }} targetPort: {{ $.Values.memgraph.ports.boltPort }} - nodePort: {{ add 32010 .id }} {{- end }} diff --git a/charts/memgraph-high-availability/values.yaml b/charts/memgraph-high-availability/values.yaml index 4497d43..044c46f 100644 --- a/charts/memgraph-high-availability/values.yaml +++ b/charts/memgraph-high-availability/values.yaml @@ -37,8 +37,11 @@ memgraph: managementPort: 10000 replicationPort: 20000 coordinatorPort: 12000 - coordLoadBalancer: - enabled: true + externalAccessConfig: + dataInstance: + serviceType: "NodePort" + coordinator: + serviceType: "NodePort" # Affinity controls the scheduling of the memgraph-high-availability pods. By default data pods will avoid being scheduled on the same node as other data pods, and coordinator pods will avoid being scheduled on the same node as other coordinator pods. Deployment won't fail if there is no sufficient nodes. affinity: # The unique affinity, will schedule the pods on different nodes in the cluster. This means coordinators and data nodes will not be scheduled on the same node. If there is more pods than nodes, deployment will fail.