diff --git a/Dockerfile b/Dockerfile index 4a51d3b..0a4b07f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,6 +14,7 @@ RUN apt update && \ DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \ clang-format-10 \ python3-pymongo \ + python3-jsonschema \ python3-pytest \ python3-pip \ && rm -rf /var/lib/apt/lists/* diff --git a/bench_scenarios/basic.json b/bench_scenarios/basic_build.json similarity index 80% rename from bench_scenarios/basic.json rename to bench_scenarios/basic_build.json index 07f0dcd..721da9d 100644 --- a/bench_scenarios/basic.json +++ b/bench_scenarios/basic_build.json @@ -1,21 +1,19 @@ { "db_bench": { - "repo_url": "https://github.com/pmem/pmemkv-tools.git", + "repo_url": "https://github.com/pmem/pmemkv-bench.git", "commit": "HEAD", - "env": { - "PMEM_IS_PMEM_FORCE": "1" - }, - "params": [ - "--db=/dev/shm/pmemkv", - "--db_size_in_gb=1" - ] + "env": {} }, "pmemkv": { "repo_url": "https://github.com/pmem/pmemkv.git", "commit": "HEAD", "cmake_params": [ "-DCMAKE_BUILD_TYPE=Release", + "-DENGINE_CMAP=1", + "-DENGINE_CSMAP=1", "-DENGINE_RADIX=1", + "-DENGINE_STREE=1", + "-DENGINE_ROBINHOOD=1", "-DBUILD_JSON_CONFIG=1", "-DCXX_STANDARD=20", "-DBUILD_TESTS=OFF", diff --git a/bench_scenarios/bench.schema.json b/bench_scenarios/bench.schema.json new file mode 100644 index 0000000..263e422 --- /dev/null +++ b/bench_scenarios/bench.schema.json @@ -0,0 +1,33 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "array", + "items": [ + { + "type": "object", + "properties": { + "env": { + "type": "object", + "description": "Definition of environment variables passed to pmemkv-bench run." + }, + "params": { + "type": "object", + "description": "Parameters passed to pmemkv-bench binary", + "properties": {}, + "required": [ + "--benchmarks", + "--value_size", + "--threads", + "--engine", + "--num", + "--db", + "--db_size_in_gb" + ] + } + }, + "required": [ + "env", + "params" + ] + } + ] +} diff --git a/bench_scenarios/pmemkv-bench.schema.json b/bench_scenarios/build.schema.json similarity index 92% rename from bench_scenarios/pmemkv-bench.schema.json rename to bench_scenarios/build.schema.json index 6a2d10c..2a916e3 100644 --- a/bench_scenarios/pmemkv-bench.schema.json +++ b/bench_scenarios/build.schema.json @@ -16,19 +16,11 @@ "type": "object", "description": "Definition of environment variables passed to pmemkv-bench run.", "properties": {} - }, - "params": { - "description": "Parameters passed to pmemkv-bench binary", - "type": "array", - "items": { - "type": "string" - } } }, "required": [ "commit", "env", - "params", "repo_url" ] }, diff --git a/bench_scenarios/generate_obj_based_scope.py b/bench_scenarios/generate_obj_based_scope.py new file mode 100644 index 0000000..4469f71 --- /dev/null +++ b/bench_scenarios/generate_obj_based_scope.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +# +# SPDX-License-Identifier: Apache-2.0 +# Copyright 2021, Intel Corporation + +# This script implements generate() method, which may be invoked by run_benchmark.py directly, +# or used as standalone application, which prints configuration json (also validated against schema) +# to stdout. Such once generated json may be saved and passed to run_benchmark.py as a parameter + +import json +import itertools +import jsonschema +import os + +benchmarks = [ + "fillseq", + "fillrandom", + "fillseq,readrandom,readrandom", + "fillrandom,readrandom,readrandom", + "fillseq,readseq,readseq", + "fillrandom,readseq,readseq", + "fillseq,readwhilewriting", + "fillseq,readrandomwriterandom", +] +size = [8, 128] +number_of_elements = 100000000 + + +def concurrent_engines(): + + number_of_threads = [1, 4, 8, 12, 18, 24] + engine = ["cmap", "csmap"] + + result = itertools.product(benchmarks, size, number_of_threads, engine) + return list(result) + + +def single_threaded_engines(): + number_of_threads = [1] + engine = ["radix", "stree"] + result = itertools.product(benchmarks, size, number_of_threads, engine) + return list(result) + + +def generate(): + benchmarks = concurrent_engines() + benchmarks.extend(single_threaded_engines()) + benchmarks_configuration = [] + db_path = os.getenv("PMEMKV_BENCH_DB_PATH", "/mnt/pmem0/pmemkv-bench") + for benchmark in benchmarks: + benchmark_settings = { + "env": { + "NUMACTL_CPUBIND": f"file:{os.path.dirname(db_path)}", + }, + "params": { + "--benchmarks": f"{benchmark[0]}", + "--value_size": f"{benchmark[1]}", + "--threads": f"{benchmark[2]}", + "--engine": f"{benchmark[3]}", + "--num": f"{number_of_elements}", + "--db": db_path, + "--db_size_in_gb": "200", + }, + } + + benchmarks_configuration.append(benchmark_settings) + + return benchmarks_configuration + + +if __name__ == "__main__": + output = generate() + schema = None + with open("bench.schema.json", "r") as schema_file: + schema = json.loads(schema_file.read()) + try: + jsonschema.validate(instance=output, schema=schema) + except jsonschema.exceptions.ValidationError as e: + print(e) + exit(1) + print(json.dumps(output, indent=4)) diff --git a/run_benchmark.py b/run_benchmark.py index 1600f5f..5383504 100644 --- a/run_benchmark.py +++ b/run_benchmark.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # # SPDX-License-Identifier: Apache-2.0 -# Copyright 2020, Intel Corporation +# Copyright 2020-2021, Intel Corporation import tempfile import os @@ -12,6 +12,8 @@ import glob import logging import sys +from importlib import util as import_util +from jsonschema import validate from pymongo import MongoClient import pymongo.errors @@ -24,11 +26,13 @@ class Repository: def __init__(self, config: dict): - self.logger = logging.getLogger(__name__) + self.logger = logging.getLogger(type(self).__name__) - self.directory = tempfile.TemporaryDirectory() - self.path = self.directory.name self.url = config["repo_url"] + self.directory = tempfile.TemporaryDirectory( + prefix=self.url.replace("/", ".").replace(":", ".") + ) + self.path = self.directory.name self.commit = config["commit"] self.clone() self.checkout() @@ -61,7 +65,7 @@ def _resolve_sha(self): class CmakeProject: def __init__(self, config: dict, dependencies: list = []): - self.logger = logging.getLogger(__name__) + self.logger = logging.getLogger(type(self).__name__) self.repo = Repository(config) self.path = self.repo.path @@ -108,14 +112,13 @@ def build(self): class DB_bench: def __init__(self, config: dict, pmemkv: CmakeProject): - self.logger = logging.getLogger(__name__) + self.logger = logging.getLogger(type(self).__name__) self.repo = Repository(config) self.path = self.repo.path self.pmemkv = pmemkv self.run_output = None self.env = config["env"] - self.benchmark_params = config["params"] def build(self): build_env = { @@ -133,21 +136,31 @@ def build(self): self.logger.error(f"Cannot build benchmark: {e}") raise e - def run(self): + def run(self, environ, benchmark_params): find_file_path = lambda root_dir, filename: ":".join( set( os.path.dirname(x) for x in glob.glob(root_dir + f"/**/{filename}", recursive=True) ) ) - + env = {} + for d in (self.env, environ): + env.update(d) + env["PATH"] = self.path + ":" + os.environ["PATH"] + env["LD_LIBRARY_PATH"] = find_file_path(self.pmemkv.install_path, "*.so.*") + self.logger.debug(f"{env=}") + params_list = [f"{key}={benchmark_params[key]}" for key in benchmark_params] + + numa_settings = [] + if "NUMACTL_CPUBIND" in env: + cpubind = env["NUMACTL_CPUBIND"] + numa_settings = ["numactl", f"--cpubind={cpubind}"] + + cmd = numa_settings + ["pmemkv_bench"] + params_list + logger.info(cmd) try: - env = self.env - env["PATH"] = self.path + ":" + os.environ["PATH"] - env["LD_LIBRARY_PATH"] = find_file_path(self.pmemkv.install_path, "*.so.*") - self.logger.debug(f"{env=}") self.run_output = subprocess.run( - ["pmemkv_bench"] + self.benchmark_params, + cmd, cwd=self.path, env=env, capture_output=True, @@ -159,6 +172,11 @@ def run(self): self.logger.error(f"with error: {e.stderr} ") raise e + def cleanup(self, benchmark_params): + db_path = benchmark_params["--db"] + subprocess.run(["pmempool", "rm", db_path], cwd=self.path, check=True) + self.logger.info(f"{db_path} cleaned") + def get_results(self): OutputReader = csv.DictReader( self.run_output.stdout.decode("UTF-8").split("\n"), delimiter="," @@ -167,17 +185,41 @@ def get_results(self): def upload_to_mongodb(address, port, username, password, db_name, collection, data): + logger = logging.getLogger("mongodb") client = MongoClient(address, int(port), username=username, password=password) with client: db = client[db_name] collection = db[collection] - collection.insert_one(data) + result = collection.insert_one(data) + logger.info(f"Inserted: {result.inserted_id} into {address}:{port}/{db_name}") def print_results(results_dict): print(json.dumps(results_dict, indent=4, sort_keys=True)) +def load_scenarios(path, schema_path=None): + bench_params = None + if path.endswith(".py"): + spec = import_util.spec_from_file_location("cfg", path) + cfg = import_util.module_from_spec(spec) + spec.loader.exec_module(cfg) + try: + bench_params = cfg.generate() + except AttributeError: + raise AttributeError( + f"Cannot execute 'generate' function from user provided generator script: {path} " + ) + else: + with open(path, "r") as config_path: + bench_params = json.loads(config_path.read()) + if schema_path: + with open(schema_path, "r") as schema_file: + schema = json.loads(schema_file.read()) + validate(instance=bench_params, schema=schema) + return bench_params + + def main(): help_msg = """ Runs pmemkv-bench for pmemkv and libpmemobjcpp defined in configuration json @@ -233,9 +275,18 @@ def main(): parser = argparse.ArgumentParser( description=help_msg, formatter_class=argparse.RawTextHelpFormatter ) - parser.add_argument("config_path", help="Path to json config file") + parser.add_argument( + "build_config_path", + help="""Path to json config file or python script, which provides generate() method. +This parameter sets configuration of build process. Input structure is specified by bench_scenarios/build.schema.json""", + ) + parser.add_argument( + "benchmark_config_path", + help="""Path to json config file or python script, which provides generate() method. +This parameter sets configuration of benchmarking process. Input structure is specified by bench_scenarios/bench.schema.json""", + ) args = parser.parse_args() - logger.info(f"{args.config_path=}") + logger.info(f"{args.build_config_path=}") # Setup database db_address = db_port = db_user = db_passwd = db_name = db_collection = None @@ -248,13 +299,21 @@ def main(): db_collection = os.environ["MONGO_DB_COLLECTION"] except KeyError as e: logger.warning( - f"Environmet variable {e} was not specified, so results cannot be uploaded to the database" + f"Environment variable {e} was not specified, so results cannot be uploaded to the database" ) + schema_dir = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "bench_scenarios" + ) - config = None - with open(args.config_path) as config_path: - config = json.loads(config_path.read()) - logger.info(config) + config = load_scenarios( + args.build_config_path, os.path.join(schema_dir, "build.schema.json") + ) + logger.info(json.dumps(config, indent=4)) + + bench_params = load_scenarios( + args.benchmark_config_path, os.path.join(schema_dir, "bench.schema.json") + ) + logger.info(json.dumps(bench_params, indent=4)) libpmemobjcpp = CmakeProject(config["libpmemobjcpp"]) libpmemobjcpp.build() @@ -265,19 +324,31 @@ def main(): benchmark = DB_bench(config["db_bench"], pmemkv) benchmark.build() - benchmark.run() - benchmark_results = benchmark.get_results() - - report = {key: config[key] for key in config} - report["results"] = benchmark_results - - print_results(report) - if db_address and db_port and db_user and db_passwd and db_name and db_collection: - upload_to_mongodb( - db_address, db_port, db_user, db_passwd, db_name, db_collection, report - ) - else: - logger.warning("Results not uploaded to database") + for test_case in bench_params: + logger.info(f"Running: {test_case}") + benchmark.run(test_case["env"], test_case["params"]) + benchmark.cleanup(test_case["params"]) + benchmark_results = benchmark.get_results() + + report = {} + report["build_configuration"] = config + report["runtime_parameters"] = test_case + report["results"] = benchmark_results + + print_results(report) + if ( + db_address + and db_port + and db_user + and db_passwd + and db_name + and db_collection + ): + upload_to_mongodb( + db_address, db_port, db_user, db_passwd, db_name, db_collection, report + ) + else: + logger.warning("Results not uploaded to database") if __name__ == "__main__": diff --git a/tests/test.py b/tests/test.py index ca168cd..4294313 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,3 +1,8 @@ +#!/usr/bin/env python3 +# +# SPDX-License-Identifier: Apache-2.0 +# Copyright 2021, Intel Corporation + import os, sys import pytest import json @@ -8,7 +13,15 @@ import run_benchmark as rb +def create_config_file(configuration): + tf = tempfile.NamedTemporaryFile(suffix=".json", mode="w", delete=False) + json.dump(configuration, tf) + tf.close() + return tf + + def test_help(): + """ Simple sanity check for -h option of run_benchmark.py. """ sys.argv = ["dummy.py", "-h"] with pytest.raises(SystemExit) as e: result = rb.main() @@ -17,23 +30,26 @@ def test_help(): def test_json(): - test_configuration = { + """Basic integration test for run_benchmark.py. It runs full + benchmarking process for arbitrarily chosen parameters. + """ + + build_configuration = { "db_bench": { - "repo_url": f"{project_path}", + "repo_url": project_path, "commit": "HEAD", - "env": {"PMEM_IS_PMEM_FORCE": "1"}, - "params": [ - f"--db={os.getenv('TEST_PATH', '/dev/shm')}", - "--db_size_in_gb=1", - "--num=100", - ], + "env": {}, }, "pmemkv": { "repo_url": "https://github.com/pmem/pmemkv.git", "commit": "HEAD", "cmake_params": [ "-DCMAKE_BUILD_TYPE=Release", + "-DENGINE_CMAP=1", + "-DENGINE_CSMAP=1", "-DENGINE_RADIX=1", + "-DENGINE_STREE=1", + "-DENGINE_ROBINHOOD=1", "-DBUILD_JSON_CONFIG=1", "-DCXX_STANDARD=20", "-DBUILD_TESTS=OFF", @@ -55,11 +71,37 @@ def test_json(): "env": {"CC": "gcc", "CXX": "g++"}, }, } - tf = tempfile.NamedTemporaryFile(suffix=".json", mode="w", delete=False) - json.dump(test_configuration, tf) - print(tf.name) - sys.argv = ["dummy.py", tf.name] - tf.close() + + benchmark_configuration = [ + { + "env": {"PMEM_IS_PMEM_FORCE": "1"}, + "params": { + "--db": os.getenv("TEST_PATH", "/dev/shm/pmemkv"), + "--db_size_in_gb": "1", + "--benchmarks": "fillrandom", + "--engine": "cmap", + "--num": "100", + "--value_size": "8", + "--threads": "2", + }, + }, + { + "env": {}, + "params": { + "--db": os.getenv("TEST_PATH", "/dev/shm/pmemkv"), + "--db_size_in_gb": "2", + "--benchmarks": "fillseq", + "--engine": "radix", + "--num": "100", + "--value_size": "1024", + "--threads": "1", + }, + }, + ] + + build_config_file = create_config_file(build_configuration) + test_config_file = create_config_file(benchmark_configuration) + sys.argv = ["dummy.py", build_config_file.name, test_config_file.name] try: result = rb.main() except Exception as e: