Skip to content

Commit

Permalink
(feat)experiments: Include Kafka network chaos experiments (#997)
Browse files Browse the repository at this point in the history
* (feat)introduce kafka network experiments

Signed-off-by: ksatchit <[email protected]>
  • Loading branch information
Karthik Satchitanand committed Dec 13, 2019
1 parent b9523b2 commit 62ffe91
Show file tree
Hide file tree
Showing 13 changed files with 520 additions and 10 deletions.
6 changes: 3 additions & 3 deletions chaoslib/pumba/network_chaos/network_chaos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,9 @@
kubectl delete -f /chaoslib/pumba/network_chaos/pumba_kube.yml -n {{ a_ns }}
args:
executable: /bin/bash
when: pumba_deploy_result.rc == 0

- name: Confirm that the pumba job is deleted successfully
- name: Confirm that the pumba job is not present
shell: >
kubectl get pods -l job-name=pumba-netem-{{ run_id }} --no-headers -n {{ a_ns }}
args:
Expand All @@ -103,5 +104,4 @@
until: "'No resources found' in result.stderr"
delay: 5
retries: 60

when: "pumba_deploy_result.rc is defined and pumba_deploy_result.rc == 0"
when: "pumba_deploy_result is defined"
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ spec:

# provide lib image
- name: LIB_IMAGE
value: 'gaiaadm/pumba:0.4.8'
value: 'gaiaadm/pumba:0.6.5'

- name: NETWORK_LATENCY
value: '60000' # in ms
Expand Down
4 changes: 4 additions & 0 deletions experiments/kafka/kafka-broker-network-latency/chaosutil.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{% if c_lib is defined and c_lib == 'pumba' %}
c_util: /chaoslib/pumba/network_chaos/network_chaos.yml
{% endif %}

Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
---
- hosts: localhost
connection: local

vars:
c_experiment: "kafka-broker-network-latency"
c_duration: "{{ lookup('env','TOTAL_CHAOS_DURATION') }}"
c_container: "{{ lookup('env','TARGET_CONTAINER') }}"
n_latency: "{{ lookup('env','NETWORK_LATENCY') }}"
n_interface: "{{ lookup('env','NETWORK_INTERFACE') }}"
c_lib: "{{ lookup('env','LIB') }}"
lib_image: "{{ lookup('env','LIB_IMAGE') }}"
kafka_instance: "{{ lookup('env','KAFKA_INSTANCE_NAME') }}"
kafka_ns: "{{ lookup('env','KAFKA_NAMESPACE') }}"
kafka_label: "{{ lookup('env','KAFKA_LABEL') }}"
kafka_kind: "{{ lookup('env','KAFKA_KIND') }}"
kafka_broker: "{{ lookup('env','KAFKA_BROKER') }}"
kafka_stream: "{{ lookup('env','KAFKA_LIVENESS_STREAM') }}"
kafka_liveness_image: "{{ lookup('env','KAFKA_LIVENESS_IMAGE') }}"
kafka_consumer_timeout: "{{ lookup('env','KAFKA_CONSUMER_TIMEOUT') }}"
kafka_service: "{{ lookup('env','KAFKA_SERVICE') }}"
kafka_port: "{{ lookup('env','KAFKA_PORT') }}"
kafka_replication_factor: "{{ lookup('env','KAFKA_REPLICATION_FACTOR') }}"
zk_ns: "{{ lookup('env','ZOOKEEPER_NAMESPACE') }}"
zk_label: "{{ lookup('env','ZOOKEEPER_LABEL') }}"
zk_service: "{{ lookup('env','ZOOKEEPER_SERVICE') }}"
zk_port: "{{ lookup('env','ZOOKEEPER_PORT') }}"

tasks:
- block:

- include: kafka-broker-network-latency-ansible-prerequisites.yml

- include_vars:
file: chaosutil.yml

## GENERATE EXP RESULT NAME
- block:

- name: Construct chaos result name (experiment_name)
set_fact:
c_experiment: "{{ lookup('env','CHAOSENGINE') }}-{{ c_experiment }}"

when: lookup('env','CHAOSENGINE')

## RECORD START-OF-EXPERIMENT IN LITMUSCHAOS RESULT CR
- include_tasks: /utils/runtime/update_chaos_result_resource.yml
vars:
status: 'SOT'
namespace: "{{ kafka_ns }}"

## PRE-CHAOS APPLICATION LIVENESS CHECK

- name: Verify that the Kafka cluster is healthy
include_tasks: "/utils/apps/kafka/kafka_cluster_health.yml"
vars:
delay: 1
retries: 60

## SETUP KAFKA CHAOS INFRA AND DERIVE BROKERS UNDER TEST

- include_tasks: "{{ kafka_broker_util }}"

## FAULT INJECTION

- include_tasks: "{{ c_util }}"
vars:
a_ns: "{{ kafka_ns }}"
a_label: "{{ kafka_label }}"
app_ns: "{{ kafka_ns }}"

# derived from the 'kafka_broker_util' task
app_pod: "{{ kafka_broker }}"

## POST-CHAOS APPLICATION LIVENESS CHECK

- name: Verify that the Kafka cluster is healthy
include_tasks: "/utils/apps/kafka/kafka_cluster_health.yml"
vars:
delay: 1
retries: 60

## CHECK FOR KAFKA LIVENESS & CLEANUP

- block:

- name: Verify that the Kafka liveness pod (pub-sub) is uninterrupted
include_tasks: "/utils/common/status_app_pod.yml"
vars:
a_ns: "{{ kafka_ns }}"
a_label: "name=kafka-liveness"
delay: 1
retries: 60

- include_tasks: "/utils/apps/kafka/kafka_liveness_cleanup.yml"

when: kafka_stream is defined and kafka_stream == 'enabled'

- set_fact:
flag: "pass"


rescue:
- set_fact:
flag: "fail"

- name: Cleanup kafka liveness pods if present
include_tasks: "/utils/apps/kafka/kafka_liveness_cleanup.yml"
ignore_errors: true

always:

## RECORD END-OF-TEST IN LITMUSCHAOS RESULT CR
- include_tasks: /utils/runtime/update_chaos_result_resource.yml
vars:
status: 'EOT'
namespace: "{{ kafka_ns }}"

Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
- name: Identify the chaos util to be invoked
template:
src: chaosutil.j2
dest: chaosutil.yml

- block:

- set_fact:
kafka_broker_util: "/utils/apps/kafka/kafka_liveness_stream.yml"
when: kafka_stream is defined and kafka_stream == 'enabled'

- set_fact:
kafka_broker_util: "/utils/apps/kafka/display_kafka_broker_info.yml"
when: kafka_stream is not defined or kafka_stream == '' or kafka_stream == 'disabled'

when: kafka_broker is defined and kafka_broker != ''

- block:

- set_fact:
kafka_broker_util: "/utils/apps/kafka/kafka_launch_stream_derive_leader_broker.yml"
when: kafka_stream is defined and kafka_stream == 'enabled'

- set_fact:
kafka_broker_util: "/utils/apps/kafka/kafka_select_broker.yml"
when: kafka_stream is not defined or kafka_stream == '' or kafka_stream == 'disabled'

when: kafka_broker is not defined or kafka_broker == ''



Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
---
apiVersion: batch/v1
kind: Job
metadata:
generateName: kafka-broker-network-latency-
spec:
template:
metadata:
labels:
experiment: kafka-broker-network-latency
spec:
# Placeholder that is updated by the executor for automated runs
# Provide appropriate SA (with desired permissions) if executed manually
serviceAccountName: %CHAOS_SERVICE_ACCOUNT%
restartPolicy: Never
containers:
- name: ansibletest
image: litmuschaos/ansible-runner:ci
imagePullPolicy: Always
env:
- name: ANSIBLE_STDOUT_CALLBACK
value: 'default'

- name: KAFKA_KIND
value: 'statefulset'

- name: KAFKA_LIVENESS_STREAM
value: 'enabled'

- name: KAFKA_LIVENESS_IMAGE
value: 'litmuschaos/kafka-client:ci'

# in milliseconds
- name: KAFKA_CONSUMER_TIMEOUT
value: '30000'

# in milliseconds
- name: TOTAL_CHAOS_DURATION
value: '60000'

- name: TARGET_CONTAINER
value: 'k8skafka'

# in milliseconds
- name: NETWORK_LATENCY
value: '60000'

- name: NETWORK_INTERFACE
value: 'eth0'

- name: KAFKA_INSTANCE_NAME
value: ''

- name: KAFKA_NAMESPACE
value: ''

- name: KAFKA_LABEL
value: ''

- name: KAFKA_BROKER
value: ''

- name: KAFKA_REPLICATION_FACTOR
value: ''

- name: KAFKA_SERVICE
value: ''

- name: KAFKA_PORT
value: ''

- name: ZOOKEEPER_NAMESPACE
value: ''

- name: ZOOKEEPER_LABEL
value: ''

- name: ZOOKEEPER_SERVICE
value: ''

- name: ZOOKEEPER_PORT
value: ''

## env var that describes the library used to execute the chaos
## supported values: pumba
- name: LIB
value: 'pumba'

- name: LIB_IMAGE
value: 'gaiaadm/pumba:0.6.5'

- name: CHAOSENGINE
value: ''

- name: CHAOS_SERVICE_ACCOUNT
valueFrom:
fieldRef:
fieldPath: spec.serviceAccountName

command: ["/bin/bash"]
args: ["-c", "ansible-playbook ./experiments/kafka/kafka-broker-network-latency/kafka-broker-network-latency-ansible-logic.yml -vv -i /etc/ansible/hosts; exit 0"]

4 changes: 4 additions & 0 deletions experiments/kafka/kafka-broker-network-loss/chaosutil.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{% if c_lib is defined and c_lib == 'pumba' %}
c_util: /chaoslib/pumba/network_chaos/network_chaos.yml
{% endif %}

Loading

0 comments on commit 62ffe91

Please sign in to comment.