Skip to content

Commit

Permalink
Add commands into stability/library (#12823)
Browse files Browse the repository at this point in the history
  • Loading branch information
maximyurchuk authored Dec 20, 2024
1 parent bc46b2c commit 2be2ee9
Showing 1 changed file with 68 additions and 31 deletions.
99 changes: 68 additions & 31 deletions ydb/tests/stability/library/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@
logging.getLogger().setLevel(logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(sys.stderr))

from ydb.tests.library.common.composite_assert import CompositeAssert # noqa
from ydb.tests.library.harness.kikimr_cluster import ExternalKiKiMRCluster # noqa
from ydb.tests.library.matchers.collection import is_empty # noqa
from ydb.tests.library.wardens.factories import safety_warden_factory, liveness_warden_factory # noqa

logger = logging.getLogger('ydb.connection')
Expand Down Expand Up @@ -61,6 +59,27 @@ def _unpack_resource(self, name):
os.chmod(path_to_unpack, st.st_mode | stat.S_IEXEC)
return path_to_unpack

def perform_checks(self):
safety_violations = safety_warden_factory(self.kikimr_cluster, self.ssh_username).list_of_safety_violations()
liveness_violations = liveness_warden_factory(self.kikimr_cluster, self.ssh_username).list_of_liveness_violations

count = 0
report = []

print("SAFETY WARDEN (total: {})".format(len(safety_violations)))
for i, violation in enumerate(safety_violations):
print("[{}]".format(i))
print(violation)
print()

print("LIVENESS WARDEN (total: {})".format(len(liveness_violations)))
for i, violation in enumerate(liveness_violations):
print("[{}]".format(i))
print(violation)

print()
return count, "\n".join(report)

def start_nemesis(self):
for node in self.kikimr_cluster.nodes.values():
node.ssh_command("sudo service nemesis restart", raise_on_error=True)
Expand All @@ -69,18 +88,18 @@ def stop_nemesis(self):
for node in self.kikimr_cluster.nodes.values():
node.ssh_command("sudo service nemesis stop", raise_on_error=False)

def setup(self, is_deploy_cluster):
def deploy_ydb(self):
self._stop_nemesis()
self.kikimr_cluster.start()

if is_deploy_cluster:
# cleanup nemesis logs
for node in self.kikimr_cluster.nodes.values():
node.ssh_command('sudo rm -rf /Berkanavt/nemesis/logs/*', raise_on_error=False)
node.ssh_command('sudo pkill screen', raise_on_error=False)
# cleanup nemesis logs
for node in self.kikimr_cluster.nodes.values():
node.ssh_command('sudo rm -rf /Berkanavt/nemesis/logs/*', raise_on_error=False)
node.ssh_command('sudo pkill screen', raise_on_error=False)

with open(self._unpack_resource("tbl_profile.txt")) as f:
self.kikimr_cluster.client.console_request(f.read())

with open(self._unpack_resource("tbl_profile.txt")) as f:
self.kikimr_cluster.client.console_request(f.read())
self.kikimr_cluster.client.update_self_heal(True)

node = list(self.kikimr_cluster.nodes.values())[0]
Expand All @@ -99,6 +118,20 @@ def setup(self, is_deploy_cluster):
)
)

def deploy_tools(self):
for node in self.kikimr_cluster.nodes.values():
node.ssh_command(["sudo", "mkdir", "-p", STRESS_BINARIES_DEPLOY_PATH], raise_on_error=False)
for artifact in self.artifacts:
node.copy_file_or_dir(
artifact,
os.path.join(
STRESS_BINARIES_DEPLOY_PATH,
os.path.basename(
artifact
)
)
)


def path_type(path):
# Expand the user's home directory if ~ is present
Expand Down Expand Up @@ -137,9 +170,13 @@ def parse_args():
type=str,
nargs="+",
choices=[
"deploy_ydb",
"deploy_tools",
"start_nemesis",
"stop_nemesis",
"start_workload_simple_queue",
"start_workload_simple_queue_row",
"start_workload_simple_queue_column",
"start_workload_olap_workload",
"stop_workload",
"perform_checks",
],
Expand All @@ -158,14 +195,26 @@ def main():
)

for action in args.actions:
setup_command = False
if setup_command:
stability_cluster.setup(True)

if action == "start_workload_simple_queue":
if action == "deploy_ydb":
stability_cluster.deploy_ydb()
if action == "deploy_tools":
stability_cluster.deploy_tools()
if action == "start_workload_simple_queue_row":
for node_id, node in enumerate(stability_cluster.kikimr_cluster.nodes.values()):
node.ssh_command(
'screen -d -m bash -c "while true; do /Berkanavt/nemesis/bin/simple_queue --database /Root/db1 --mode row; done"',
raise_on_error=True
)
if action == "start_workload_simple_queue_column":
for node_id, node in enumerate(stability_cluster.kikimr_cluster.nodes.values()):
node.ssh_command(
'screen -d -m bash -c "while true; do /Berkanavt/nemesis/bin/simple_queue --database /Root/db1 --mode column; done"',
raise_on_error=True
)
if action == "start_workload_olap_workload":
for node_id, node in enumerate(stability_cluster.kikimr_cluster.nodes.values()):
node.ssh_command(
'screen -d -m bash -c "while true; do /Berkanavt/nemesis/bin/simple_queue --database /Root/db1 ; done"',
'screen -d -m bash -c "while true; do /Berkanavt/nemesis/bin/olap_workload --database /Root/db1 --mode column; done"',
raise_on_error=True
)
if action == "stop_workload":
Expand All @@ -182,20 +231,8 @@ def main():
stability_cluster.start_nemesis()

if action == "perform_checks":
composite_assert = CompositeAssert()
composite_assert.assert_that(
safety_warden_factory(stability_cluster.kikimr_cluster, ssh_username).list_of_safety_violations(),
is_empty(),
"No safety violations by Safety Warden"
)

composite_assert.assert_that(
liveness_warden_factory(stability_cluster.kikimr_cluster, ssh_username).list_of_liveness_violations,
is_empty(),
"No liveness violations by liveness warden",
)

composite_assert.finish()
count, report = stability_cluster.perform_checks()
print(report)


if __name__ == "__main__":
Expand Down

0 comments on commit 2be2ee9

Please sign in to comment.