Skip to content

Commit

Permalink
Created Cilium Network Policy and Cilium Cluster Wide Network Policy …
Browse files Browse the repository at this point in the history
…Test (#456)

I created a Cilium Network Policy and Cilium Cluster Wide Network Policy
pipeline that will deploy a set number of CCNPS and CNPS that match to
pods. It will also scale to a certain number of nodes and scale to a set
number of pods. The policies have egress and ingress rules to reflect
customer usage and to be able to test control plane resource usage
consumption. This test is created based on the existing service churn
pipeline framework.

I created this test to be able to find the cilium cpu, memory, and
apiserver usage through testing a 1000 node cilium cluster with
endpoints as well as a 3000 node cilium cluster with CES enabled. This
is for the upcoming official release of the cnp/ccnp feature.

Here is the link to my pipeline:
https://dev.azure.com/akstelescope/telescope/_build?definitionId=37 and
here is a link to a successful run:
https://dev.azure.com/akstelescope/telescope/_build/results?buildId=9548&view=results

Since I made changes to files that the other pipelines use, I'm running
Service & Cluster Churn:

Regular Pipelines:
- Service Churn:
https://dev.azure.com/akstelescope/telescope/_build/results?buildId=9545&view=results
- Cluster Churn:
https://dev.azure.com/akstelescope/telescope/_build/results?buildId=9546&view=results
  • Loading branch information
karina-ranadive authored Jan 16, 2025
1 parent 0ad5629 commit 90a7647
Show file tree
Hide file tree
Showing 11 changed files with 246 additions and 13 deletions.
19 changes: 19 additions & 0 deletions modules/python/clusterloader2/slo/config/ccnp_template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: cilium.io/v2
kind: CiliumClusterwideNetworkPolicy
metadata:
name: {{.basename}}
spec:
endpointSelector:
matchLabels:
group: cnp-ccnp
ingressDeny:
- fromEndpoints:
- matchLabels:
io.kubernetes.pod.namespace: default
egress:
- toPorts:
- ports:
- port: "53"
protocol: UDP
toEntities:
- cluster
20 changes: 20 additions & 0 deletions modules/python/clusterloader2/slo/config/cnp_template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
apiVersion: cilium.io/v2
kind: CiliumNetworkPolicy
metadata:
name: {{.basename}}
namespace: slo-1 # slo-1 was used because that is the ns pods are deployed in & tried passing in namespace from load-config but had object mismatch error, revise in future to possibly pass in ns
spec:
endpointSelector:
matchLabels:
group: cnp-ccnp
ingressDeny:
- fromEndpoints:
- matchLabels:
io.kubernetes.pod.namespace: default
egress:
- toPorts:
- ports:
- port: "443"
protocol: TCP
toCIDR:
- 0.0.0.0/0
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
{{$CpuRequest := DefaultParam .CpuRequest "5m"}}
{{$MemoryRequest := DefaultParam .MemoryRequest "20M"}}
{{$cnp_test:= .cnp_test}}
{{$ccnp_test:= .ccnp_test}}

{{$Image := DefaultParam .Image "mcr.microsoft.com/oss/kubernetes/pause:3.6"}}

Expand Down
66 changes: 65 additions & 1 deletion modules/python/clusterloader2/slo/config/load-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ name: load-config

# Config options for test type
{{$SERVICE_TEST := DefaultParam .CL2_SERVICE_TEST true}}
{{$CNP_TEST := DefaultParam .CL2_CNP_TEST false}}
{{$CCNP_TEST := DefaultParam .CL2_CCNP_TEST false}}

# Config options for test parameters
{{$nodesPerNamespace := DefaultParam .CL2_NODES_PER_NAMESPACE 100}}
Expand Down Expand Up @@ -34,6 +36,11 @@ name: load-config
{{$smallDeploymentPods := SubtractInt $podsPerNamespace (MultiplyInt $bigDeploymentsPerNamespace $BIG_GROUP_SIZE)}}
{{$smallDeploymentsPerNamespace := DivideInt $smallDeploymentPods $SMALL_GROUP_SIZE}}

# CNP & CCNP Test
{{$CNPS_PER_NAMESPACE := DefaultParam .CL2_CNPS_PER_NAMESPACE 0}}
{{$CCNPS := DefaultParam .CL2_CCNPS 0}}
{{$DUALSTACK := DefaultParam .CL2_DUALSTACK false}}

namespace:
number: {{$namespaces}}
prefix: slo
Expand Down Expand Up @@ -85,15 +92,41 @@ steps:
bigServicesPerNamespace: {{$bigDeploymentsPerNamespace}}
{{end}}

{{if $CNP_TEST}}
- module:
path: /modules/ciliumnetworkpolicy.yaml
params:
actionName: "Creating"
namespaces: {{$namespaces}}
Group: {{$groupName}}
cnpsPerNamespace: {{$CNPS_PER_NAMESPACE}}
{{end}}

{{if $CCNP_TEST}}
- module:
path: /modules/ciliumclusternetworkpolicy.yaml
params:
actionName: "Creating"
Group: {{$groupName}}
ccnps: {{$CCNPS}}
{{end}}

- module:
path: /modules/reconcile-objects.yaml
params:
actionName: "create"
namespaces: {{$namespaces}}
tuningSet: DeploymentCreateQps
operationTimeout: {{$operationTimeout}}
{{if or $CCNP_TEST $CNP_TEST}}
bigDeploymentSize: 0
bigDeploymentsPerNamespace: 0
cnp_test: {{$CNP_TEST}}
ccnp_test: {{$CCNP_TEST}}
{{else}}
bigDeploymentSize: {{$BIG_GROUP_SIZE}}
bigDeploymentsPerNamespace: {{$bigDeploymentsPerNamespace}}
{{end}}
smallDeploymentSize: {{$SMALL_GROUP_SIZE}}
smallDeploymentsPerNamespace: {{$smallDeploymentsPerNamespace}}
CpuRequest: {{$latencyPodCpu}}m
Expand All @@ -108,8 +141,15 @@ steps:
namespaces: {{$namespaces}}
tuningSet: Sequence
operationTimeout: {{$operationTimeout}}
{{if or $CCNP_TEST $CNP_TEST}}
bigDeploymentSize: 0
bigDeploymentsPerNamespace: 0
cnp_test: {{$CNP_TEST}}
ccnp_test: {{$CCNP_TEST}}
{{else}}
bigDeploymentSize: {{$BIG_GROUP_SIZE}}
bigDeploymentsPerNamespace: {{$bigDeploymentsPerNamespace}}
{{end}}
smallDeploymentSize: {{$SMALL_GROUP_SIZE}}
smallDeploymentsPerNamespace: {{$smallDeploymentsPerNamespace}}
CpuRequest: {{$latencyPodCpu}}m
Expand All @@ -124,13 +164,20 @@ steps:
namespaces: {{$namespaces}}
tuningSet: DeploymentDeleteQps
operationTimeout: {{$operationTimeout}}
{{if or $CCNP_TEST $CNP_TEST}}
bigDeploymentSize: 0
bigDeploymentsPerNamespace: 0
cnp_test: {{$CNP_TEST}}
ccnp_test: {{$CCNP_TEST}}
{{else}}
bigDeploymentSize: {{$BIG_GROUP_SIZE}}
bigDeploymentsPerNamespace: 0
{{end}}
smallDeploymentSize: {{$SMALL_GROUP_SIZE}}
smallDeploymentsPerNamespace: 0
deploymentLabel: restart
Group: {{$groupName}}

{{if $SERVICE_TEST}}
- module:
path: /modules/services.yaml
params:
Expand All @@ -139,6 +186,23 @@ steps:
smallServicesPerNamespace: 0
bigServicesPerNamespace: 0
{{end}}
{{if $CNP_TEST}}
- module:
path: /modules/ciliumnetworkpolicy.yaml
params:
actionName: "Deleting"
namespaces: {{$namespaces}}
cnpsPerNamespace: 0
{{end}}
{{if $CCNP_TEST}}
- module:
path: /modules/ciliumclusternetworkpolicy.yaml
params:
actionName: "Deleting"
namespaces: {{$namespaces}}
ccnps: 0
{{end}}
{{end}}

{{if $CILIUM_METRICS_ENABLED}}
- module:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
## CCNP module provides a module for creating / deleting CCNPs.

## Input params
{{$actionName := .actionName}}
{{$ccnps := .ccnps}}
{{$Group := .Group}}

steps:
- name: "{{$actionName}} {{$ccnps}} k8s CCNPs"
phases:
- namespaceRange: null
replicasPerNamespace: {{$ccnps}}
tuningSet: Sequence
objectBundle:
- basename: ccnp
objectTemplatePath: ccnp_template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
## CNP module provides a module for creating / deleting CNPs.

## Input params
{{$actionName := .actionName}}
{{$namespaces := .namespaces}}
{{$cnpsPerNamespace := .cnpsPerNamespace}}
{{$Group := .Group}}

steps:
- name: "{{$actionName}} {{$cnpsPerNamespace}} k8s CNPs"
phases:
- namespaceRange:
min: 1
max: {{$namespaces}}
replicasPerNamespace: {{$cnpsPerNamespace}}
tuningSet: Sequence
objectBundle:
- basename: cnp
objectTemplatePath: cnp_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
{{$smallDeploymentSize := .smallDeploymentSize}}
{{$smallDeploymentsPerNamespace := .smallDeploymentsPerNamespace}}

{{$cnp_test:= .cnp_test}}
{{$ccnp_test:= .ccnp_test}}

steps:
- name: Starting measurement for '{{$actionName}}'
measurements:
Expand Down Expand Up @@ -55,7 +58,12 @@ steps:
objectTemplatePath: deployment_template.yaml
templateFillMap:
Replicas: {{$smallDeploymentSize}}
{{if or $cnp_test $ccnp_test}}
cnp_test: {{$cnp_test}}
ccnp_test: {{$ccnp_test}}
{{else}}
SvcName: small-service
{{end}}
Group: {{.Group}}
deploymentLabel: {{.deploymentLabel}}

Expand Down
58 changes: 50 additions & 8 deletions modules/python/clusterloader2/slo/slo.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,16 @@
}
# TODO: Remove aks once CL2 update provider name to be azure

def calculate_config(cpu_per_node, node_count, provider, service_test):
def calculate_config(cpu_per_node, node_count, max_pods, provider, service_test, cnp_test, ccnp_test):
throughput = 100
nodes_per_namespace = min(node_count, DEFAULT_NODES_PER_NAMESPACE)

pods_per_node = DEFAULT_PODS_PER_NODE
if service_test:
pods_per_node = LOAD_PODS_PER_NODE

if cnp_test or ccnp_test:
pods_per_node = max_pods
# Different cloud has different reserved values and number of daemonsets
# Using the same percentage will lead to incorrect nodes number as the number of nodes grow
# For AWS, see: https://github.com/awslabs/amazon-eks-ami/blob/main/templates/al2/runtime/bootstrap.sh#L290
Expand All @@ -52,10 +54,15 @@ def configure_clusterloader2(
provider,
cilium_enabled,
service_test,
cnp_test,
ccnp_test,
num_cnps,
num_ccnps,
dualstack,
override_file):

steps = node_count // node_per_step
throughput, nodes_per_namespace, pods_per_node, cpu_request = calculate_config(cpu_per_node, node_per_step, provider, service_test)
throughput, nodes_per_namespace, pods_per_node, cpu_request = calculate_config(cpu_per_node, node_per_step, max_pods, provider, service_test, cnp_test, ccnp_test)

with open(override_file, 'w') as file:
file.write(f"CL2_LOAD_TEST_THROUGHPUT: {throughput}\n")
Expand All @@ -81,6 +88,20 @@ def configure_clusterloader2(

if service_test:
file.write("CL2_SERVICE_TEST: true\n")
else:
file.write("CL2_SERVICE_TEST: false\n")

if cnp_test:
file.write("CL2_CNP_TEST: true\n")
file.write(f"CL2_CNPS_PER_NAMESPACE: {num_cnps}\n")
file.write(f"CL2_DUALSTACK: {dualstack}\n")
file.write("CL2_GROUP_NAME: cnp-ccnp\n")

if ccnp_test:
file.write("CL2_CCNP_TEST: true\n")
file.write(f"CL2_CCNPS: {num_ccnps}\n")
file.write(f"CL2_DUALSTACK: {dualstack}\n")
file.write("CL2_GROUP_NAME: cnp-ccnp\n")

with open(override_file, 'r') as file:
print(f"Content of file {override_file}:\n{file.read()}")
Expand Down Expand Up @@ -115,6 +136,11 @@ def collect_clusterloader2(
run_id,
run_url,
service_test,
cnp_test,
ccnp_test,
num_cnps,
num_ccnps,
dualstack,
result_file,
test_type="default_config",
):
Expand All @@ -128,7 +154,7 @@ def collect_clusterloader2(
else:
raise Exception(f"No testsuites found in the report! Raw data: {details}")

_, _, pods_per_node, _ = calculate_config(cpu_per_node, node_count, provider, service_test)
_, _, pods_per_node, _ = calculate_config(cpu_per_node, node_count, max_pods, provider, service_test, cnp_test, ccnp_test)
pod_count = node_count * pods_per_node

# TODO: Expose optional parameter to include test details
Expand Down Expand Up @@ -191,14 +217,22 @@ def main():
parser_configure.add_argument("cpu_per_node", type=int, help="CPU per node")
parser_configure.add_argument("node_count", type=int, help="Number of nodes")
parser_configure.add_argument("node_per_step", type=int, help="Number of nodes per scaling step")
parser_configure.add_argument("max_pods", type=int, help="Maximum number of pods per node")
parser_configure.add_argument("max_pods", type=int, nargs='?', default=0, help="Maximum number of pods per node")
parser_configure.add_argument("repeats", type=int, help="Number of times to repeat the deployment churn")
parser_configure.add_argument("operation_timeout", type=str, help="Timeout before failing the scale up test")
parser_configure.add_argument("provider", type=str, help="Cloud provider name")
parser_configure.add_argument("cilium_enabled", type=eval, choices=[True, False], default=False,
help="Whether cilium is enabled. Must be either True or False")
parser_configure.add_argument("service_test", type=eval, choices=[True, False], default=False,
help="Whether service test is running. Must be either True or False")
parser_configure.add_argument("cnp_test", type=eval, choices=[True, False], nargs='?', default=False,
help="Whether cnp test is running. Must be either True or False")
parser_configure.add_argument("ccnp_test", type=eval, choices=[True, False], nargs='?', default=False,
help="Whether ccnp test is running. Must be either True or False")
parser_configure.add_argument("num_cnps", type=int, nargs='?', default=0, help="Number of cnps")
parser_configure.add_argument("num_ccnps", type=int, nargs='?', default=0, help="Number of ccnps")
parser_configure.add_argument("dualstack", type=eval, choices=[True, False], nargs='?', default=False,
help="Whether cluster is dualstack. Must be either True or False")
parser_configure.add_argument("cl2_override_file", type=str, help="Path to the overrides of CL2 config file")

# Sub-command for validate_clusterloader2
Expand All @@ -219,24 +253,32 @@ def main():
parser_collect = subparsers.add_parser("collect", help="Collect scale up data")
parser_collect.add_argument("cpu_per_node", type=int, help="CPU per node")
parser_collect.add_argument("node_count", type=int, help="Number of nodes")
parser_collect.add_argument("max_pods", type=int, help="Maximum number of pods per node")
parser_collect.add_argument("max_pods", type=int, nargs='?', default=0, help="Maximum number of pods per node")
parser_collect.add_argument("repeats", type=int, help="Number of times to repeat the deployment churn")
parser_collect.add_argument("cl2_report_dir", type=str, help="Path to the CL2 report directory")
parser_collect.add_argument("cloud_info", type=str, help="Cloud information")
parser_collect.add_argument("run_id", type=str, help="Run ID")
parser_collect.add_argument("run_url", type=str, help="Run URL")
parser_collect.add_argument("service_test", type=eval, choices=[True, False], default=False,
help="Whether service test is running. Must be either True or False")
parser_collect.add_argument("cnp_test", type=eval, choices=[True, False], nargs='?', default=False,
help="Whether cnp test is running. Must be either True or False")
parser_collect.add_argument("ccnp_test", type=eval, choices=[True, False], nargs='?', default=False,
help="Whether ccnp test is running. Must be either True or False")
parser_collect.add_argument("num_cnps", type=int, nargs='?', default=0, help="Number of cnps")
parser_collect.add_argument("num_ccnps", type=int, nargs='?', default=0, help="Number of ccnps")
parser_collect.add_argument("dualstack", type=eval, choices=[True, False], nargs='?', default=False,
help="Whether cluster is dualstack. Must be either True or False")
parser_collect.add_argument("result_file", type=str, help="Path to the result file")
parser_collect.add_argument("test_type", type=str, nargs='?', default="default-config",
help="Description of test type")

args = parser.parse_args()

if args.command == "configure":
configure_clusterloader2(args.cpu_per_node, args.node_count, args.node_per_step, args.max_pods,
args.repeats, args.operation_timeout, args.provider, args.cilium_enabled,
args.service_test, args.cl2_override_file)
args.service_test, args.cnp_test, args.ccnp_test, args.num_cnps, args.num_ccnps, args.dualstack, args.cl2_override_file)
elif args.command == "validate":
validate_clusterloader2(args.node_count, args.operation_timeout)
elif args.command == "execute":
Expand All @@ -245,7 +287,7 @@ def main():
elif args.command == "collect":
collect_clusterloader2(args.cpu_per_node, args.node_count, args.max_pods, args.repeats,
args.cl2_report_dir, args.cloud_info, args.run_id, args.run_url,
args.service_test, args.result_file, args.test_type)
args.service_test, args.cnp_test, args.ccnp_test, args.num_cnps, args.num_ccnps, args.dualstack, args.result_file, args.test_type)

if __name__ == "__main__":
main()
Loading

0 comments on commit 90a7647

Please sign in to comment.