From 35e9fb6ee2e025832e445681f95ea346fe350a5b Mon Sep 17 00:00:00 2001 From: Anish Palakurthi Date: Fri, 1 Nov 2024 01:00:13 +0000 Subject: [PATCH 1/5] adding mongodb --- Makefile | 5 ++ .../kernmlops/kernmlops_benchmark/__init__.py | 2 + python/kernmlops/kernmlops_benchmark/gap.py | 2 + .../kernmlops/kernmlops_benchmark/mongodb.py | 88 +++++++++++++++++++ 4 files changed, 97 insertions(+) create mode 100644 python/kernmlops/kernmlops_benchmark/mongodb.py diff --git a/Makefile b/Makefile index 1a59f0f..3bbb285 100644 --- a/Makefile +++ b/Makefile @@ -106,6 +106,11 @@ benchmark-gap: -c ${KERNMLOPS_CONFIG_FILE} \ --benchmark gap +benchmark-mongodb: + @python python/kernmlops collect -v \ + -c ${KERNMLOPS_CONFIG_FILE} \ + --benchmark mongodb + benchmark-linux-build: @python python/kernmlops collect -v \ -c ${KERNMLOPS_CONFIG_FILE} \ diff --git a/python/kernmlops/kernmlops_benchmark/__init__.py b/python/kernmlops/kernmlops_benchmark/__init__.py index ef8c3ef..058cb4a 100644 --- a/python/kernmlops/kernmlops_benchmark/__init__.py +++ b/python/kernmlops/kernmlops_benchmark/__init__.py @@ -13,6 +13,7 @@ BenchmarkRunningError, ) from kernmlops_benchmark.gap import GapBenchmark +from kernmlops_benchmark.mongodb import MongoDbBenchmark from kernmlops_benchmark.linux_build import LinuxBuildBenchmark from kernmlops_config import ConfigBase @@ -20,6 +21,7 @@ FauxBenchmark.name(): FauxBenchmark, LinuxBuildBenchmark.name(): LinuxBuildBenchmark, GapBenchmark.name(): GapBenchmark, + MongoDbBenchmark.name(): MongoDbBenchmark, } BenchmarkConfig = make_dataclass( diff --git a/python/kernmlops/kernmlops_benchmark/gap.py b/python/kernmlops/kernmlops_benchmark/gap.py index d61f04e..b7e6c6b 100644 --- a/python/kernmlops/kernmlops_benchmark/gap.py +++ b/python/kernmlops/kernmlops_benchmark/gap.py @@ -51,6 +51,8 @@ def setup(self) -> None: def run(self) -> None: if self.process is not None: raise BenchmarkRunningError() + + print(str(self.benchmark_dir / self.config.gap_benchmark)) self.process = subprocess.Popen( [ str(self.benchmark_dir / self.config.gap_benchmark), diff --git a/python/kernmlops/kernmlops_benchmark/mongodb.py b/python/kernmlops/kernmlops_benchmark/mongodb.py new file mode 100644 index 0000000..0b3c53d --- /dev/null +++ b/python/kernmlops/kernmlops_benchmark/mongodb.py @@ -0,0 +1,88 @@ +import subprocess +from dataclasses import dataclass +from typing import Literal, cast + +from data_schema import GraphEngine, demote +from kernmlops_benchmark.benchmark import Benchmark, GenericBenchmarkConfig +from kernmlops_benchmark.errors import ( + BenchmarkNotInCollectionData, + BenchmarkNotRunningError, + BenchmarkRunningError, +) +from kernmlops_config import ConfigBase + + +@dataclass(frozen=True) +class BenchmarkConfig(ConfigBase): + + recordCount: int = 1000000 + readProportion: float = 0.25 + updateProportion: float =0.75 + + +class MongoDbBenchmark(Benchmark): + + @classmethod + def name(cls) -> str: + return "mongodb" + + @classmethod + def default_config(cls) -> ConfigBase: + return BenchmarkConfig() + + @classmethod + def from_config(cls, config: ConfigBase) -> "Benchmark": + generic_config = cast(GenericBenchmarkConfig, getattr(config, "generic")) + gap_config = cast(BenchmarkConfig, getattr(config, cls.name())) + return MongoDbBenchmark(generic_config=generic_config, config=gap_config) + + def __init__(self, *, generic_config: GenericBenchmarkConfig, config: BenchmarkConfig): + self.generic_config = generic_config + self.config = config + self.benchmark_dir = self.generic_config.get_benchmark_dir() / self.name() + self.process: subprocess.Popen | None = None + + def is_configured(self) -> bool: + return self.benchmark_dir.is_dir() + + def setup(self) -> None: + if self.process is not None: + raise BenchmarkRunningError() + self.generic_config.generic_setup() + + def run(self) -> None: + if self.process is not None: + raise BenchmarkRunningError() + + bash_file_path = self.benchmark_dir / "../run_mongodb.sh" # Add the path to your bash file here + print(bash_file_path) + self.process = subprocess.Popen( + [ + "bash", + bash_file_path, + + ], + preexec_fn=demote(), + stdout=subprocess.DEVNULL, + ) + + def poll(self) -> int | None: + if self.process is None: + raise BenchmarkNotRunningError() + return self.process.poll() + + def wait(self) -> None: + if self.process is None: + raise BenchmarkNotRunningError() + self.process.wait() + + def kill(self) -> None: + if self.process is None: + raise BenchmarkNotRunningError() + self.process.terminate() + + @classmethod + def plot_events(cls, graph_engine: GraphEngine) -> None: + if graph_engine.collection_data.benchmark != cls.name(): + raise BenchmarkNotInCollectionData() + # TODO(Patrick): plot when a trial starts/ends From 596b185d9a0474a919d435ec6a2112e3296237f5 Mon Sep 17 00:00:00 2001 From: Anish Palakurthi Date: Thu, 7 Nov 2024 23:08:17 +0000 Subject: [PATCH 2/5] need to persist env var, iso'd issue to mdb --- Makefile | 19 + .../kernmlops/kernmlops_benchmark/mongodb.py | 4 +- scripts/setup-benchmarks/install_mongodb.sh | 394 ++++++++++++++++++ scripts/setup-benchmarks/install_ycsb.sh | 361 ++++++++++++++++ 4 files changed, 777 insertions(+), 1 deletion(-) create mode 100644 scripts/setup-benchmarks/install_mongodb.sh create mode 100644 scripts/setup-benchmarks/install_ycsb.sh diff --git a/Makefile b/Makefile index 3bbb285..165c0e5 100644 --- a/Makefile +++ b/Makefile @@ -187,6 +187,25 @@ docker: ${IMAGE_NAME}:${VERSION} \ ${CONTAINER_CMD} || true +# Benchmark Installation commands + +# Must be run before using any ycsb benchmark (once) +install-ycsb: + @echo "Installing ycsb..." + @source scripts/setup-benchmarks/install_ycsb.sh + +# Must be run before using any mongodb benchmark (once) +install-mongodb: + @echo "Installing mongodb benchmark..." + @source scripts/setup-benchmarks/install_mongodb.sh + +# Must be run before using the MongoDB benchmark each time YCSB target benchmark is changed +load-mongodb: + @echo "Running YCSB load..." + ${YCSB_PATH}/bin/ycsb load mongodb -s -P workloads/workloada \ + -p recordcount=1000000 \ + -p mongodb.url=mongodb://localhost:27017/ycsb \ + -p mongodb.writeConcern=acknowledged # Miscellaneous commands clean-docker-images: diff --git a/python/kernmlops/kernmlops_benchmark/mongodb.py b/python/kernmlops/kernmlops_benchmark/mongodb.py index 0b3c53d..e7e91ad 100644 --- a/python/kernmlops/kernmlops_benchmark/mongodb.py +++ b/python/kernmlops/kernmlops_benchmark/mongodb.py @@ -43,6 +43,8 @@ def __init__(self, *, generic_config: GenericBenchmarkConfig, config: BenchmarkC self.process: subprocess.Popen | None = None def is_configured(self) -> bool: + return True + # print(f'is_configured directory name: {self.benchmark_dir}') return self.benchmark_dir.is_dir() def setup(self) -> None: @@ -54,7 +56,7 @@ def run(self) -> None: if self.process is not None: raise BenchmarkRunningError() - bash_file_path = self.benchmark_dir / "../run_mongodb.sh" # Add the path to your bash file here + bash_file_path = self.benchmark_dir / "run_mongodb.sh" # Add the path to your bash file here print(bash_file_path) self.process = subprocess.Popen( [ diff --git a/scripts/setup-benchmarks/install_mongodb.sh b/scripts/setup-benchmarks/install_mongodb.sh new file mode 100644 index 0000000..b02eab2 --- /dev/null +++ b/scripts/setup-benchmarks/install_mongodb.sh @@ -0,0 +1,394 @@ +#!/bin/bash + +# apt-get update && apt-get install -y openjdk-11-jdk && echo 'export PATH=$PATH:/usr/lib/jvm/java-11-openjdk-amd64/bin' >> ~/.bashrc && source ~/.bashrc + +# Update and install required packages +apt update + +# Download YCSB +curl -O --location https://github.com/brianfrankcooper/YCSB/releases/download/0.17.0/ycsb-0.17.0.tar.gz +tar xfvz ycsb-0.17.0.tar.gz +cd ycsb-0.17.0 + +export YCSB_HOME=$(pwd) + + +# Replace the bin/ycsb script +# Replace the bin/ycsb script +cat << 'EOF' > bin/ycsb +#!/usr/bin/python3 +# +# Copyright (c) 2012 - 2015 YCSB contributors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +import errno +import fnmatch +import io +import os +import shlex +import sys +import subprocess +import argparse + +BASE_URL = "https://github.com/brianfrankcooper/YCSB/tree/master/" +COMMANDS = { + "shell" : { + "command" : "", + "description" : "Interactive mode", + "main" : "site.ycsb.CommandLine", + }, + "load" : { + "command" : "-load", + "description" : "Execute the load phase", + "main" : "site.ycsb.Client", + }, + "run" : { + "command" : "-t", + "description" : "Execute the transaction phase", + "main" : "site.ycsb.Client", + }, +} + + +DATABASES = { + "accumulo" : "site.ycsb.db.accumulo.AccumuloClient", + "accumulo1.6" : "site.ycsb.db.accumulo.AccumuloClient", + "accumulo1.7" : "site.ycsb.db.accumulo.AccumuloClient", + "accumulo1.8" : "site.ycsb.db.accumulo.AccumuloClient", + "aerospike" : "site.ycsb.db.AerospikeClient", + "arangodb" : "site.ycsb.db.arangodb.ArangoDBClient", + "arangodb3" : "site.ycsb.db.arangodb.ArangoDBClient", + "asynchbase" : "site.ycsb.db.AsyncHBaseClient", + "azurecosmos" : "site.ycsb.db.AzureCosmosClient", + "azuretablestorage" : "site.ycsb.db.azuretablestorage.AzureClient", + "basic" : "site.ycsb.BasicDB", + "basicts" : "site.ycsb.BasicTSDB", + "cassandra-cql": "site.ycsb.db.CassandraCQLClient", + "cassandra2-cql": "site.ycsb.db.CassandraCQLClient", + "cloudspanner" : "site.ycsb.db.cloudspanner.CloudSpannerClient", + "couchbase" : "site.ycsb.db.CouchbaseClient", + "couchbase2" : "site.ycsb.db.couchbase2.Couchbase2Client", + "crail" : "site.ycsb.db.crail.CrailClient", + "dynamodb" : "site.ycsb.db.DynamoDBClient", + "elasticsearch": "site.ycsb.db.ElasticsearchClient", + "elasticsearch5": "site.ycsb.db.elasticsearch5.ElasticsearchClient", + "elasticsearch5-rest": "site.ycsb.db.elasticsearch5.ElasticsearchRestClient", + "foundationdb" : "site.ycsb.db.foundationdb.FoundationDBClient", + "geode" : "site.ycsb.db.GeodeClient", + "googlebigtable" : "site.ycsb.db.GoogleBigtableClient", + "googledatastore" : "site.ycsb.db.GoogleDatastoreClient", + "griddb" : "site.ycsb.db.griddb.GridDBClient", + "hbase098" : "site.ycsb.db.HBaseClient", + "hbase10" : "site.ycsb.db.HBaseClient10", + "hbase12" : "site.ycsb.db.hbase12.HBaseClient12", + "hbase14" : "site.ycsb.db.hbase14.HBaseClient14", + "hbase20" : "site.ycsb.db.hbase20.HBaseClient20", + "hypertable" : "site.ycsb.db.HypertableClient", + "ignite" : "site.ycsb.db.ignite.IgniteClient", + "ignite-sql" : "site.ycsb.db.ignite.IgniteSqlClient", + "infinispan-cs": "site.ycsb.db.InfinispanRemoteClient", + "infinispan" : "site.ycsb.db.InfinispanClient", + "jdbc" : "site.ycsb.db.JdbcDBClient", + "kudu" : "site.ycsb.db.KuduYCSBClient", + "memcached" : "site.ycsb.db.MemcachedClient", + "maprdb" : "site.ycsb.db.mapr.MapRDBClient", + "maprjsondb" : "site.ycsb.db.mapr.MapRJSONDBClient", + "mongodb" : "site.ycsb.db.MongoDbClient", + "mongodb-async": "site.ycsb.db.AsyncMongoDbClient", + "nosqldb" : "site.ycsb.db.NoSqlDbClient", + "orientdb" : "site.ycsb.db.OrientDBClient", + "postgrenosql" : "site.ycsb.postgrenosql.PostgreNoSQLDBClient", + "rados" : "site.ycsb.db.RadosClient", + "redis" : "site.ycsb.db.RedisClient", + "rest" : "site.ycsb.webservice.rest.RestClient", + "riak" : "site.ycsb.db.riak.RiakKVClient", + "rocksdb" : "site.ycsb.db.rocksdb.RocksDBClient", + "s3" : "site.ycsb.db.S3Client", + "solr" : "site.ycsb.db.solr.SolrClient", + "solr6" : "site.ycsb.db.solr6.SolrClient", + "tarantool" : "site.ycsb.db.TarantoolClient", + "tablestore" : "site.ycsb.db.tablestore.TableStoreClient" +} + +OPTIONS = { + "-P file" : "Specify workload file", + "-p key=value" : "Override workload property", + "-s" : "Print status to stderr", + "-target n" : "Target ops/sec (default: unthrottled)", + "-threads n" : "Number of client threads (default: 1)", + "-cp path" : "Additional Java classpath entries", + "-jvm-args args" : "Additional arguments to the JVM", +} + +def usage(): + output = io.StringIO() + print("%s command database [options]" % sys.argv[0], file=output) + + print("\nCommands:", file=output) + for command in sorted(COMMANDS.keys()): + print(" %s %s" % (command.ljust(14), + COMMANDS[command]["description"]), file=output) + + print("\nDatabases:", file=output) + for db in sorted(DATABASES.keys()): + print(" %s %s" % (db.ljust(14), BASE_URL + + db.split("-")[0]), file=output) + + print("\nOptions:", file=output) + for option in sorted(OPTIONS.keys()): + print(" %s %s" % (option.ljust(14), OPTIONS[option]), file=output) + + print("""\nWorkload Files: + There are various predefined workloads under workloads/ directory. + See https://github.com/brianfrankcooper/YCSB/wiki/Core-Properties + for the list of workload properties.""", file=output) + + return output.getvalue() + +# Python 2.6 doesn't have check_output. Add the method as it is in Python 2.7 +# Based on https://github.com/python/cpython/blob/2.7/Lib/subprocess.py#L545 +def check_output(*popenargs, **kwargs): + r"""Run command with arguments and return its output as a byte string. + + If the exit code was non-zero it raises a CalledProcessError. The + CalledProcessError object will have the return code in the returncode + attribute and output in the output attribute. + + The arguments are the same as for the Popen constructor. Example: + + >>> check_output(["ls", "-l", "/dev/null"]) + 'crw-rw-rw- 1 root root 1, 3 Oct 18 2007 /dev/null\n' + + The stdout argument is not allowed as it is used internally. + To capture standard error in the result, use stderr=STDOUT. + + >>> check_output(["/bin/sh", "-c", + ... "ls -l non_existent_file ; exit 0"], + ... stderr=STDOUT) + 'ls: non_existent_file: No such file or directory\n' + """ + if 'stdout' in kwargs: + raise ValueError('stdout argument not allowed, it will be overridden.') + process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) + output, unused_err = process.communicate() + retcode = process.poll() + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = popenargs[0] + error = subprocess.CalledProcessError(retcode, cmd) + error.output = output + raise error + return output + + +def debug(message): + print("[DEBUG] ", message, file=sys.stderr) + +def warn(message): + print("[WARN] ", message, file=sys.stderr) + +def error(message): + print("[ERROR] ", message, file=sys.stderr) + +def find_jars(dir, glob='*.jar'): + jars = [] + for (dirpath, dirnames, filenames) in os.walk(dir): + for filename in fnmatch.filter(filenames, glob): + jars.append(os.path.join(dirpath, filename)) + return jars + +def get_ycsb_home(): + dir = os.path.abspath(os.path.dirname(sys.argv[0])) + while "LICENSE.txt" not in os.listdir(dir): + dir = os.path.join(dir, os.path.pardir) + return os.path.abspath(dir) + +def is_distribution(): + # If there's a top level pom, we're a source checkout. otherwise a dist artifact + return "pom.xml" not in os.listdir(get_ycsb_home()) + +# Run the maven dependency plugin to get the local jar paths. +# presumes maven can run, so should only be run on source checkouts +# will invoke the 'package' goal for the given binding in order to resolve intra-project deps +# presumes maven properly handles system-specific path separators +# Given module is full module name eg. 'core' or 'couchbase-binding' +def get_classpath_from_maven(module): + try: + debug("Running 'mvn -pl site.ycsb:" + module + " -am package -DskipTests " + "dependency:build-classpath -DincludeScope=compile -Dmdep.outputFilterFile=true'") + mvn_output = subprocess.check_output(["mvn", "-pl", "site.ycsb:" + module, + "-am", "package", "-DskipTests", + "dependency:build-classpath", + "-DincludeScope=compile", + "-Dmdep.outputFilterFile=true"], universal_newlines=True) + line = [x for x in mvn_output.splitlines() if x.startswith("classpath=")][-1:] + return line[0][len("classpath="):] + except subprocess.CalledProcessError as err: + error("Attempting to generate a classpath from Maven failed " + f"with return code '{err.returncode}'. The output from " + "Maven follows, try running " + "'mvn -DskipTests package dependency:build=classpath' on your " + "own and correct errors." + os.linesep + os.linesep + "mvn output:" + os.linesep + + err.output) + sys.exit(err.returncode) + + +def main(): + p = argparse.ArgumentParser( + usage=usage(), + formatter_class=argparse.RawDescriptionHelpFormatter) + p.add_argument('-cp', dest='classpath', help="""Additional classpath + entries, e.g. '-cp /tmp/hbase-1.0.1.1/conf'. Will be + prepended to the YCSB classpath.""") + p.add_argument("-jvm-args", default=[], type=shlex.split, + help="""Additional arguments to pass to 'java', e.g. + '-Xmx4g'""") + p.add_argument("command", choices=sorted(COMMANDS), + help="""Command to run.""") + p.add_argument("database", choices=sorted(DATABASES), + help="""Database to test.""") + args, remaining = p.parse_known_args() + ycsb_home = get_ycsb_home() + + # Use JAVA_HOME to find java binary if set, otherwise just use PATH. + java = "java" + java_home = os.getenv("JAVA_HOME") + if java_home: + java = os.path.join(java_home, "bin", "java") + db_classname = DATABASES[args.database] + command = COMMANDS[args.command]["command"] + main_classname = COMMANDS[args.command]["main"] + + # Classpath set up + binding = args.database.split("-")[0] + + if binding == "accumulo": + warn("The 'accumulo' client has been deprecated in favor of version " + "specific bindings. This name still maps to the binding for " + "Accumulo 1.6, which is named 'accumulo-1.6'. This alias will " + "be removed in a future YCSB release.") + binding = "accumulo1.6" + + if binding == "accumulo1.6": + warn("The 'accumulo1.6' client has been deprecated because Accumulo 1.6 " + "is EOM. If you are using Accumulo 1.7+ try using the 'accumulo1.7' " + "client instead.") + + if binding == "cassandra2": + warn("The 'cassandra2-cql' client has been deprecated. It has been " + "renamed to simply 'cassandra-cql'. This alias will be removed" + " in the next YCSB release.") + binding = "cassandra" + + if binding == "couchbase": + warn("The 'couchbase' client has been deprecated. If you are using " + "Couchbase 4.0+ try using the 'couchbase2' client instead.") + + if binding == "hbase098": + warn("The 'hbase098' client has been deprecated because HBase 0.98 " + "is EOM. If you are using HBase 1.2+ try using the 'hbase12' " + "client instead.") + + if binding == "hbase10": + warn("The 'hbase10' client has been deprecated because HBase 1.0 " + "is EOM. If you are using HBase 1.2+ try using the 'hbase12' " + "client instead.") + + if binding == "arangodb3": + warn("The 'arangodb3' client has been deprecated. The binding 'arangodb' " + "now covers every ArangoDB version. This alias will be removed " + "in the next YCSB release.") + binding = "arangodb" + + if is_distribution(): + db_dir = os.path.join(ycsb_home, binding + "-binding") + # include top-level conf for when we're a binding-specific artifact. + # If we add top-level conf to the general artifact, starting here + # will allow binding-specific conf to override (because it's prepended) + cp = [os.path.join(ycsb_home, "conf")] + cp.extend(find_jars(os.path.join(ycsb_home, "lib"))) + cp.extend(find_jars(os.path.join(db_dir, "lib"))) + else: + warn("Running against a source checkout. In order to get our runtime " + "dependencies we'll have to invoke Maven. Depending on the state " + "of your system, this may take ~30-45 seconds") + db_location = "core" if (binding == "basic" or binding == "basicts") else binding + project = "core" if (binding == "basic" or binding == "basicts") else binding + "-binding" + db_dir = os.path.join(ycsb_home, db_location) + # goes first so we can rely on side-effect of package + maven_says = get_classpath_from_maven(project) + # TODO when we have a version property, skip the glob + cp = find_jars(os.path.join(db_dir, "target"), + project + "*.jar") + # alredy in jar:jar:jar form + cp.append(maven_says) + cp.insert(0, os.path.join(db_dir, "conf")) + classpath = os.pathsep.join(cp) + if args.classpath: + classpath = os.pathsep.join([args.classpath, classpath]) + + ycsb_command = ([java] + args.jvm_args + + ["-cp", classpath, + main_classname, "-db", db_classname] + remaining) + if command: + ycsb_command.append(command) + print(" ".join(ycsb_command), file=sys.stderr) + try: + return subprocess.call(ycsb_command) + except OSError as e: + if e.errno == errno.ENOENT: + error('Command failed. Is java installed and on your PATH?') + return 1 + else: + raise + +if __name__ == '__main__': + sys.exit(main()) +EOF + +# Make the ycsb script executable +chmod +x bin/ycsb + +# Import MongoDB public GPG Key +apt-get install gnupg curl +curl -fsSL https://pgp.mongodb.com/server-6.0.asc | \ + gpg -o /usr/share/keyrings/mongodb-server-6.0.gpg --dearmor + +# Create the list file /etc/apt/sources.list.d/mongodb-org-6.0.list +echo "deb [ arch=amd64,arm64 signed-by=/usr/share/keyrings/mongodb-server-6.0.gpg ] https://repo.mongodb.org/apt/ubuntu jammy/mongodb-org/6.0 multiverse" | + tee /etc/apt/sources.list.d/mongodb-org-6.0.list + +apt-get update +apt-get install -y mongodb-org + +systemctl start mongod +systemctl enable mongod + +# sudo systemctl status mongod + +# Run YCSB load and run mongodb commands +./bin/ycsb load mongodb -s -P workloads/workloada \ + -p recordcount=1000000 \ + -p mongodb.url=mongodb://localhost:27017/ycsb \ + -p mongodb.writeConcern=acknowledged + +# ./bin/ycsb run mongodb -s -P workloads/workloada \ +# -p operationcount=1000000 \ +# -p mongodb.url=mongodb://localhost:27017/ycsb \ +# -p readproportion=0.25 \ +# -p updateproportion=0.75 \ +# -p mongodb.writeConcern=acknowledged diff --git a/scripts/setup-benchmarks/install_ycsb.sh b/scripts/setup-benchmarks/install_ycsb.sh new file mode 100644 index 0000000..55782f1 --- /dev/null +++ b/scripts/setup-benchmarks/install_ycsb.sh @@ -0,0 +1,361 @@ +# Update and install required packages +sudo apt update + +# Download YCSB +curl -O --location https://github.com/brianfrankcooper/YCSB/releases/download/0.17.0/ycsb-0.17.0.tar.gz +tar xfvz ycsb-0.17.0.tar.gz +cd ycsb-0.17.0 + +# Set YCSB_HOME environment variable +export YCSB_HOME=$(pwd) +echo $YCSB_HOME + +# Replace the bin/ycsb script +# Replace the bin/ycsb script +cat << 'EOF' > bin/ycsb +#!/usr/bin/python3 +# +# Copyright (c) 2012 - 2015 YCSB contributors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +import errno +import fnmatch +import io +import os +import shlex +import sys +import subprocess +import argparse + +BASE_URL = "https://github.com/brianfrankcooper/YCSB/tree/master/" +COMMANDS = { + "shell" : { + "command" : "", + "description" : "Interactive mode", + "main" : "site.ycsb.CommandLine", + }, + "load" : { + "command" : "-load", + "description" : "Execute the load phase", + "main" : "site.ycsb.Client", + }, + "run" : { + "command" : "-t", + "description" : "Execute the transaction phase", + "main" : "site.ycsb.Client", + }, +} + + +DATABASES = { + "accumulo" : "site.ycsb.db.accumulo.AccumuloClient", + "accumulo1.6" : "site.ycsb.db.accumulo.AccumuloClient", + "accumulo1.7" : "site.ycsb.db.accumulo.AccumuloClient", + "accumulo1.8" : "site.ycsb.db.accumulo.AccumuloClient", + "aerospike" : "site.ycsb.db.AerospikeClient", + "arangodb" : "site.ycsb.db.arangodb.ArangoDBClient", + "arangodb3" : "site.ycsb.db.arangodb.ArangoDBClient", + "asynchbase" : "site.ycsb.db.AsyncHBaseClient", + "azurecosmos" : "site.ycsb.db.AzureCosmosClient", + "azuretablestorage" : "site.ycsb.db.azuretablestorage.AzureClient", + "basic" : "site.ycsb.BasicDB", + "basicts" : "site.ycsb.BasicTSDB", + "cassandra-cql": "site.ycsb.db.CassandraCQLClient", + "cassandra2-cql": "site.ycsb.db.CassandraCQLClient", + "cloudspanner" : "site.ycsb.db.cloudspanner.CloudSpannerClient", + "couchbase" : "site.ycsb.db.CouchbaseClient", + "couchbase2" : "site.ycsb.db.couchbase2.Couchbase2Client", + "crail" : "site.ycsb.db.crail.CrailClient", + "dynamodb" : "site.ycsb.db.DynamoDBClient", + "elasticsearch": "site.ycsb.db.ElasticsearchClient", + "elasticsearch5": "site.ycsb.db.elasticsearch5.ElasticsearchClient", + "elasticsearch5-rest": "site.ycsb.db.elasticsearch5.ElasticsearchRestClient", + "foundationdb" : "site.ycsb.db.foundationdb.FoundationDBClient", + "geode" : "site.ycsb.db.GeodeClient", + "googlebigtable" : "site.ycsb.db.GoogleBigtableClient", + "googledatastore" : "site.ycsb.db.GoogleDatastoreClient", + "griddb" : "site.ycsb.db.griddb.GridDBClient", + "hbase098" : "site.ycsb.db.HBaseClient", + "hbase10" : "site.ycsb.db.HBaseClient10", + "hbase12" : "site.ycsb.db.hbase12.HBaseClient12", + "hbase14" : "site.ycsb.db.hbase14.HBaseClient14", + "hbase20" : "site.ycsb.db.hbase20.HBaseClient20", + "hypertable" : "site.ycsb.db.HypertableClient", + "ignite" : "site.ycsb.db.ignite.IgniteClient", + "ignite-sql" : "site.ycsb.db.ignite.IgniteSqlClient", + "infinispan-cs": "site.ycsb.db.InfinispanRemoteClient", + "infinispan" : "site.ycsb.db.InfinispanClient", + "jdbc" : "site.ycsb.db.JdbcDBClient", + "kudu" : "site.ycsb.db.KuduYCSBClient", + "memcached" : "site.ycsb.db.MemcachedClient", + "maprdb" : "site.ycsb.db.mapr.MapRDBClient", + "maprjsondb" : "site.ycsb.db.mapr.MapRJSONDBClient", + "mongodb" : "site.ycsb.db.MongoDbClient", + "mongodb-async": "site.ycsb.db.AsyncMongoDbClient", + "nosqldb" : "site.ycsb.db.NoSqlDbClient", + "orientdb" : "site.ycsb.db.OrientDBClient", + "postgrenosql" : "site.ycsb.postgrenosql.PostgreNoSQLDBClient", + "rados" : "site.ycsb.db.RadosClient", + "redis" : "site.ycsb.db.RedisClient", + "rest" : "site.ycsb.webservice.rest.RestClient", + "riak" : "site.ycsb.db.riak.RiakKVClient", + "rocksdb" : "site.ycsb.db.rocksdb.RocksDBClient", + "s3" : "site.ycsb.db.S3Client", + "solr" : "site.ycsb.db.solr.SolrClient", + "solr6" : "site.ycsb.db.solr6.SolrClient", + "tarantool" : "site.ycsb.db.TarantoolClient", + "tablestore" : "site.ycsb.db.tablestore.TableStoreClient" +} + +OPTIONS = { + "-P file" : "Specify workload file", + "-p key=value" : "Override workload property", + "-s" : "Print status to stderr", + "-target n" : "Target ops/sec (default: unthrottled)", + "-threads n" : "Number of client threads (default: 1)", + "-cp path" : "Additional Java classpath entries", + "-jvm-args args" : "Additional arguments to the JVM", +} + +def usage(): + output = io.StringIO() + print("%s command database [options]" % sys.argv[0], file=output) + + print("\nCommands:", file=output) + for command in sorted(COMMANDS.keys()): + print(" %s %s" % (command.ljust(14), + COMMANDS[command]["description"]), file=output) + + print("\nDatabases:", file=output) + for db in sorted(DATABASES.keys()): + print(" %s %s" % (db.ljust(14), BASE_URL + + db.split("-")[0]), file=output) + + print("\nOptions:", file=output) + for option in sorted(OPTIONS.keys()): + print(" %s %s" % (option.ljust(14), OPTIONS[option]), file=output) + + print("""\nWorkload Files: + There are various predefined workloads under workloads/ directory. + See https://github.com/brianfrankcooper/YCSB/wiki/Core-Properties + for the list of workload properties.""", file=output) + + return output.getvalue() + +# Python 2.6 doesn't have check_output. Add the method as it is in Python 2.7 +# Based on https://github.com/python/cpython/blob/2.7/Lib/subprocess.py#L545 +def check_output(*popenargs, **kwargs): + r"""Run command with arguments and return its output as a byte string. + + If the exit code was non-zero it raises a CalledProcessError. The + CalledProcessError object will have the return code in the returncode + attribute and output in the output attribute. + + The arguments are the same as for the Popen constructor. Example: + + >>> check_output(["ls", "-l", "/dev/null"]) + 'crw-rw-rw- 1 root root 1, 3 Oct 18 2007 /dev/null\n' + + The stdout argument is not allowed as it is used internally. + To capture standard error in the result, use stderr=STDOUT. + + >>> check_output(["/bin/sh", "-c", + ... "ls -l non_existent_file ; exit 0"], + ... stderr=STDOUT) + 'ls: non_existent_file: No such file or directory\n' + """ + if 'stdout' in kwargs: + raise ValueError('stdout argument not allowed, it will be overridden.') + process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) + output, unused_err = process.communicate() + retcode = process.poll() + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = popenargs[0] + error = subprocess.CalledProcessError(retcode, cmd) + error.output = output + raise error + return output + + +def debug(message): + print("[DEBUG] ", message, file=sys.stderr) + +def warn(message): + print("[WARN] ", message, file=sys.stderr) + +def error(message): + print("[ERROR] ", message, file=sys.stderr) + +def find_jars(dir, glob='*.jar'): + jars = [] + for (dirpath, dirnames, filenames) in os.walk(dir): + for filename in fnmatch.filter(filenames, glob): + jars.append(os.path.join(dirpath, filename)) + return jars + +def get_ycsb_home(): + dir = os.path.abspath(os.path.dirname(sys.argv[0])) + while "LICENSE.txt" not in os.listdir(dir): + dir = os.path.join(dir, os.path.pardir) + return os.path.abspath(dir) + +def is_distribution(): + # If there's a top level pom, we're a source checkout. otherwise a dist artifact + return "pom.xml" not in os.listdir(get_ycsb_home()) + +# Run the maven dependency plugin to get the local jar paths. +# presumes maven can run, so should only be run on source checkouts +# will invoke the 'package' goal for the given binding in order to resolve intra-project deps +# presumes maven properly handles system-specific path separators +# Given module is full module name eg. 'core' or 'couchbase-binding' +def get_classpath_from_maven(module): + try: + debug("Running 'mvn -pl site.ycsb:" + module + " -am package -DskipTests " + "dependency:build-classpath -DincludeScope=compile -Dmdep.outputFilterFile=true'") + mvn_output = subprocess.check_output(["mvn", "-pl", "site.ycsb:" + module, + "-am", "package", "-DskipTests", + "dependency:build-classpath", + "-DincludeScope=compile", + "-Dmdep.outputFilterFile=true"], universal_newlines=True) + line = [x for x in mvn_output.splitlines() if x.startswith("classpath=")][-1:] + return line[0][len("classpath="):] + except subprocess.CalledProcessError as err: + error("Attempting to generate a classpath from Maven failed " + f"with return code '{err.returncode}'. The output from " + "Maven follows, try running " + "'mvn -DskipTests package dependency:build=classpath' on your " + "own and correct errors." + os.linesep + os.linesep + "mvn output:" + os.linesep + + err.output) + sys.exit(err.returncode) + + +def main(): + p = argparse.ArgumentParser( + usage=usage(), + formatter_class=argparse.RawDescriptionHelpFormatter) + p.add_argument('-cp', dest='classpath', help="""Additional classpath + entries, e.g. '-cp /tmp/hbase-1.0.1.1/conf'. Will be + prepended to the YCSB classpath.""") + p.add_argument("-jvm-args", default=[], type=shlex.split, + help="""Additional arguments to pass to 'java', e.g. + '-Xmx4g'""") + p.add_argument("command", choices=sorted(COMMANDS), + help="""Command to run.""") + p.add_argument("database", choices=sorted(DATABASES), + help="""Database to test.""") + args, remaining = p.parse_known_args() + ycsb_home = get_ycsb_home() + + # Use JAVA_HOME to find java binary if set, otherwise just use PATH. + java = "java" + java_home = os.getenv("JAVA_HOME") + if java_home: + java = os.path.join(java_home, "bin", "java") + db_classname = DATABASES[args.database] + command = COMMANDS[args.command]["command"] + main_classname = COMMANDS[args.command]["main"] + + # Classpath set up + binding = args.database.split("-")[0] + + if binding == "accumulo": + warn("The 'accumulo' client has been deprecated in favor of version " + "specific bindings. This name still maps to the binding for " + "Accumulo 1.6, which is named 'accumulo-1.6'. This alias will " + "be removed in a future YCSB release.") + binding = "accumulo1.6" + + if binding == "accumulo1.6": + warn("The 'accumulo1.6' client has been deprecated because Accumulo 1.6 " + "is EOM. If you are using Accumulo 1.7+ try using the 'accumulo1.7' " + "client instead.") + + if binding == "cassandra2": + warn("The 'cassandra2-cql' client has been deprecated. It has been " + "renamed to simply 'cassandra-cql'. This alias will be removed" + " in the next YCSB release.") + binding = "cassandra" + + if binding == "couchbase": + warn("The 'couchbase' client has been deprecated. If you are using " + "Couchbase 4.0+ try using the 'couchbase2' client instead.") + + if binding == "hbase098": + warn("The 'hbase098' client has been deprecated because HBase 0.98 " + "is EOM. If you are using HBase 1.2+ try using the 'hbase12' " + "client instead.") + + if binding == "hbase10": + warn("The 'hbase10' client has been deprecated because HBase 1.0 " + "is EOM. If you are using HBase 1.2+ try using the 'hbase12' " + "client instead.") + + if binding == "arangodb3": + warn("The 'arangodb3' client has been deprecated. The binding 'arangodb' " + "now covers every ArangoDB version. This alias will be removed " + "in the next YCSB release.") + binding = "arangodb" + + if is_distribution(): + db_dir = os.path.join(ycsb_home, binding + "-binding") + # include top-level conf for when we're a binding-specific artifact. + # If we add top-level conf to the general artifact, starting here + # will allow binding-specific conf to override (because it's prepended) + cp = [os.path.join(ycsb_home, "conf")] + cp.extend(find_jars(os.path.join(ycsb_home, "lib"))) + cp.extend(find_jars(os.path.join(db_dir, "lib"))) + else: + warn("Running against a source checkout. In order to get our runtime " + "dependencies we'll have to invoke Maven. Depending on the state " + "of your system, this may take ~30-45 seconds") + db_location = "core" if (binding == "basic" or binding == "basicts") else binding + project = "core" if (binding == "basic" or binding == "basicts") else binding + "-binding" + db_dir = os.path.join(ycsb_home, db_location) + # goes first so we can rely on side-effect of package + maven_says = get_classpath_from_maven(project) + # TODO when we have a version property, skip the glob + cp = find_jars(os.path.join(db_dir, "target"), + project + "*.jar") + # alredy in jar:jar:jar form + cp.append(maven_says) + cp.insert(0, os.path.join(db_dir, "conf")) + classpath = os.pathsep.join(cp) + if args.classpath: + classpath = os.pathsep.join([args.classpath, classpath]) + + ycsb_command = ([java] + args.jvm_args + + ["-cp", classpath, + main_classname, "-db", db_classname] + remaining) + if command: + ycsb_command.append(command) + print(" ".join(ycsb_command), file=sys.stderr) + try: + return subprocess.call(ycsb_command) + except OSError as e: + if e.errno == errno.ENOENT: + error('Command failed. Is java installed and on your PATH?') + return 1 + else: + raise + +if __name__ == '__main__': + sys.exit(main()) +EOF + +# Make the ycsb script executable +chmod +x bin/ycsb From d3c851b78ec85351a55daadd979a1787ec39e212 Mon Sep 17 00:00:00 2001 From: Anish Palakurthi Date: Mon, 11 Nov 2024 22:28:37 +0000 Subject: [PATCH 3/5] runners: --- run_benchmarks.sh | 455 ++++++++++++++++++++ run_gapbs.sh | 182 ++++++++ run_redis.sh | 370 ++++++++++++++++ scripts/setup-benchmarks/install_mongodb.sh | 14 +- 4 files changed, 1018 insertions(+), 3 deletions(-) create mode 100755 run_benchmarks.sh create mode 100755 run_gapbs.sh create mode 100755 run_redis.sh diff --git a/run_benchmarks.sh b/run_benchmarks.sh new file mode 100755 index 0000000..df27cb8 --- /dev/null +++ b/run_benchmarks.sh @@ -0,0 +1,455 @@ +#!/bin/bash + +# Update and install required packages +sudo apt-get update +sudo apt-get install -y openjdk-11-jdk redis-server memcached netcat xz-utils mongodb-org curl gnupg + +# Download YCSB +curl -O --location https://github.com/brianfrankcooper/YCSB/releases/download/0.17.0/ycsb-0.17.0.tar.gz +tar xfvz ycsb-0.17.0.tar.gz +cd ycsb-0.17.0 + +cat << 'EOF' > bin/ycsb +#!/usr/bin/python3 +# +# Copyright (c) 2012 - 2015 YCSB contributors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +import errno +import fnmatch +import io +import os +import shlex +import sys +import subprocess +import argparse + +BASE_URL = "https://github.com/brianfrankcooper/YCSB/tree/master/" +COMMANDS = { + "shell" : { + "command" : "", + "description" : "Interactive mode", + "main" : "site.ycsb.CommandLine", + }, + "load" : { + "command" : "-load", + "description" : "Execute the load phase", + "main" : "site.ycsb.Client", + }, + "run" : { + "command" : "-t", + "description" : "Execute the transaction phase", + "main" : "site.ycsb.Client", + }, +} + + +DATABASES = { + "accumulo" : "site.ycsb.db.accumulo.AccumuloClient", + "accumulo1.6" : "site.ycsb.db.accumulo.AccumuloClient", + "accumulo1.7" : "site.ycsb.db.accumulo.AccumuloClient", + "accumulo1.8" : "site.ycsb.db.accumulo.AccumuloClient", + "aerospike" : "site.ycsb.db.AerospikeClient", + "arangodb" : "site.ycsb.db.arangodb.ArangoDBClient", + "arangodb3" : "site.ycsb.db.arangodb.ArangoDBClient", + "asynchbase" : "site.ycsb.db.AsyncHBaseClient", + "azurecosmos" : "site.ycsb.db.AzureCosmosClient", + "azuretablestorage" : "site.ycsb.db.azuretablestorage.AzureClient", + "basic" : "site.ycsb.BasicDB", + "basicts" : "site.ycsb.BasicTSDB", + "cassandra-cql": "site.ycsb.db.CassandraCQLClient", + "cassandra2-cql": "site.ycsb.db.CassandraCQLClient", + "cloudspanner" : "site.ycsb.db.cloudspanner.CloudSpannerClient", + "couchbase" : "site.ycsb.db.CouchbaseClient", + "couchbase2" : "site.ycsb.db.couchbase2.Couchbase2Client", + "crail" : "site.ycsb.db.crail.CrailClient", + "dynamodb" : "site.ycsb.db.DynamoDBClient", + "elasticsearch": "site.ycsb.db.ElasticsearchClient", + "elasticsearch5": "site.ycsb.db.elasticsearch5.ElasticsearchClient", + "elasticsearch5-rest": "site.ycsb.db.elasticsearch5.ElasticsearchRestClient", + "foundationdb" : "site.ycsb.db.foundationdb.FoundationDBClient", + "geode" : "site.ycsb.db.GeodeClient", + "googlebigtable" : "site.ycsb.db.GoogleBigtableClient", + "googledatastore" : "site.ycsb.db.GoogleDatastoreClient", + "griddb" : "site.ycsb.db.griddb.GridDBClient", + "hbase098" : "site.ycsb.db.HBaseClient", + "hbase10" : "site.ycsb.db.HBaseClient10", + "hbase12" : "site.ycsb.db.hbase12.HBaseClient12", + "hbase14" : "site.ycsb.db.hbase14.HBaseClient14", + "hbase20" : "site.ycsb.db.hbase20.HBaseClient20", + "hypertable" : "site.ycsb.db.HypertableClient", + "ignite" : "site.ycsb.db.ignite.IgniteClient", + "ignite-sql" : "site.ycsb.db.ignite.IgniteSqlClient", + "infinispan-cs": "site.ycsb.db.InfinispanRemoteClient", + "infinispan" : "site.ycsb.db.InfinispanClient", + "jdbc" : "site.ycsb.db.JdbcDBClient", + "kudu" : "site.ycsb.db.KuduYCSBClient", + "memcached" : "site.ycsb.db.MemcachedClient", + "maprdb" : "site.ycsb.db.mapr.MapRDBClient", + "maprjsondb" : "site.ycsb.db.mapr.MapRJSONDBClient", + "mongodb" : "site.ycsb.db.MongoDbClient", + "mongodb-async": "site.ycsb.db.AsyncMongoDbClient", + "nosqldb" : "site.ycsb.db.NoSqlDbClient", + "orientdb" : "site.ycsb.db.OrientDBClient", + "postgrenosql" : "site.ycsb.postgrenosql.PostgreNoSQLDBClient", + "rados" : "site.ycsb.db.RadosClient", + "redis" : "site.ycsb.db.RedisClient", + "rest" : "site.ycsb.webservice.rest.RestClient", + "riak" : "site.ycsb.db.riak.RiakKVClient", + "rocksdb" : "site.ycsb.db.rocksdb.RocksDBClient", + "s3" : "site.ycsb.db.S3Client", + "solr" : "site.ycsb.db.solr.SolrClient", + "solr6" : "site.ycsb.db.solr6.SolrClient", + "tarantool" : "site.ycsb.db.TarantoolClient", + "tablestore" : "site.ycsb.db.tablestore.TableStoreClient" +} + +OPTIONS = { + "-P file" : "Specify workload file", + "-p key=value" : "Override workload property", + "-s" : "Print status to stderr", + "-target n" : "Target ops/sec (default: unthrottled)", + "-threads n" : "Number of client threads (default: 1)", + "-cp path" : "Additional Java classpath entries", + "-jvm-args args" : "Additional arguments to the JVM", +} + +def usage(): + output = io.StringIO() + print("%s command database [options]" % sys.argv[0], file=output) + + print("\nCommands:", file=output) + for command in sorted(COMMANDS.keys()): + print(" %s %s" % (command.ljust(14), + COMMANDS[command]["description"]), file=output) + + print("\nDatabases:", file=output) + for db in sorted(DATABASES.keys()): + print(" %s %s" % (db.ljust(14), BASE_URL + + db.split("-")[0]), file=output) + + print("\nOptions:", file=output) + for option in sorted(OPTIONS.keys()): + print(" %s %s" % (option.ljust(14), OPTIONS[option]), file=output) + + print("""\nWorkload Files: + There are various predefined workloads under workloads/ directory. + See https://github.com/brianfrankcooper/YCSB/wiki/Core-Properties + for the list of workload properties.""", file=output) + + return output.getvalue() + +# Python 2.6 doesn't have check_output. Add the method as it is in Python 2.7 +# Based on https://github.com/python/cpython/blob/2.7/Lib/subprocess.py#L545 +def check_output(*popenargs, **kwargs): + r"""Run command with arguments and return its output as a byte string. + + If the exit code was non-zero it raises a CalledProcessError. The + CalledProcessError object will have the return code in the returncode + attribute and output in the output attribute. + + The arguments are the same as for the Popen constructor. Example: + + >>> check_output(["ls", "-l", "/dev/null"]) + 'crw-rw-rw- 1 root root 1, 3 Oct 18 2007 /dev/null\n' + + The stdout argument is not allowed as it is used internally. + To capture standard error in the result, use stderr=STDOUT. + + >>> check_output(["/bin/sh", "-c", + ... "ls -l non_existent_file ; exit 0"], + ... stderr=STDOUT) + 'ls: non_existent_file: No such file or directory\n' + """ + if 'stdout' in kwargs: + raise ValueError('stdout argument not allowed, it will be overridden.') + process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) + output, unused_err = process.communicate() + retcode = process.poll() + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = popenargs[0] + error = subprocess.CalledProcessError(retcode, cmd) + error.output = output + raise error + return output + + +def debug(message): + print("[DEBUG] ", message, file=sys.stderr) + +def warn(message): + print("[WARN] ", message, file=sys.stderr) + +def error(message): + print("[ERROR] ", message, file=sys.stderr) + +def find_jars(dir, glob='*.jar'): + jars = [] + for (dirpath, dirnames, filenames) in os.walk(dir): + for filename in fnmatch.filter(filenames, glob): + jars.append(os.path.join(dirpath, filename)) + return jars + +def get_ycsb_home(): + dir = os.path.abspath(os.path.dirname(sys.argv[0])) + while "LICENSE.txt" not in os.listdir(dir): + dir = os.path.join(dir, os.path.pardir) + return os.path.abspath(dir) + +def is_distribution(): + # If there's a top level pom, we're a source checkout. otherwise a dist artifact + return "pom.xml" not in os.listdir(get_ycsb_home()) + +# Run the maven dependency plugin to get the local jar paths. +# presumes maven can run, so should only be run on source checkouts +# will invoke the 'package' goal for the given binding in order to resolve intra-project deps +# presumes maven properly handles system-specific path separators +# Given module is full module name eg. 'core' or 'couchbase-binding' +def get_classpath_from_maven(module): + try: + debug("Running 'mvn -pl site.ycsb:" + module + " -am package -DskipTests " + "dependency:build-classpath -DincludeScope=compile -Dmdep.outputFilterFile=true'") + mvn_output = subprocess.check_output(["mvn", "-pl", "site.ycsb:" + module, + "-am", "package", "-DskipTests", + "dependency:build-classpath", + "-DincludeScope=compile", + "-Dmdep.outputFilterFile=true"], universal_newlines=True) + line = [x for x in mvn_output.splitlines() if x.startswith("classpath=")][-1:] + return line[0][len("classpath="):] + except subprocess.CalledProcessError as err: + error("Attempting to generate a classpath from Maven failed " + f"with return code '{err.returncode}'. The output from " + "Maven follows, try running " + "'mvn -DskipTests package dependency:build=classpath' on your " + "own and correct errors." + os.linesep + os.linesep + "mvn output:" + os.linesep + + err.output) + sys.exit(err.returncode) + + +def main(): + p = argparse.ArgumentParser( + usage=usage(), + formatter_class=argparse.RawDescriptionHelpFormatter) + p.add_argument('-cp', dest='classpath', help="""Additional classpath + entries, e.g. '-cp /tmp/hbase-1.0.1.1/conf'. Will be + prepended to the YCSB classpath.""") + p.add_argument("-jvm-args", default=[], type=shlex.split, + help="""Additional arguments to pass to 'java', e.g. + '-Xmx4g'""") + p.add_argument("command", choices=sorted(COMMANDS), + help="""Command to run.""") + p.add_argument("database", choices=sorted(DATABASES), + help="""Database to test.""") + args, remaining = p.parse_known_args() + ycsb_home = get_ycsb_home() + + # Use JAVA_HOME to find java binary if set, otherwise just use PATH. + java = "java" + java_home = os.getenv("JAVA_HOME") + if java_home: + java = os.path.join(java_home, "bin", "java") + db_classname = DATABASES[args.database] + command = COMMANDS[args.command]["command"] + main_classname = COMMANDS[args.command]["main"] + + # Classpath set up + binding = args.database.split("-")[0] + + if binding == "accumulo": + warn("The 'accumulo' client has been deprecated in favor of version " + "specific bindings. This name still maps to the binding for " + "Accumulo 1.6, which is named 'accumulo-1.6'. This alias will " + "be removed in a future YCSB release.") + binding = "accumulo1.6" + + if binding == "accumulo1.6": + warn("The 'accumulo1.6' client has been deprecated because Accumulo 1.6 " + "is EOM. If you are using Accumulo 1.7+ try using the 'accumulo1.7' " + "client instead.") + + if binding == "cassandra2": + warn("The 'cassandra2-cql' client has been deprecated. It has been " + "renamed to simply 'cassandra-cql'. This alias will be removed" + " in the next YCSB release.") + binding = "cassandra" + + if binding == "couchbase": + warn("The 'couchbase' client has been deprecated. If you are using " + "Couchbase 4.0+ try using the 'couchbase2' client instead.") + + if binding == "hbase098": + warn("The 'hbase098' client has been deprecated because HBase 0.98 " + "is EOM. If you are using HBase 1.2+ try using the 'hbase12' " + "client instead.") + + if binding == "hbase10": + warn("The 'hbase10' client has been deprecated because HBase 1.0 " + "is EOM. If you are using HBase 1.2+ try using the 'hbase12' " + "client instead.") + + if binding == "arangodb3": + warn("The 'arangodb3' client has been deprecated. The binding 'arangodb' " + "now covers every ArangoDB version. This alias will be removed " + "in the next YCSB release.") + binding = "arangodb" + + if is_distribution(): + db_dir = os.path.join(ycsb_home, binding + "-binding") + # include top-level conf for when we're a binding-specific artifact. + # If we add top-level conf to the general artifact, starting here + # will allow binding-specific conf to override (because it's prepended) + cp = [os.path.join(ycsb_home, "conf")] + cp.extend(find_jars(os.path.join(ycsb_home, "lib"))) + cp.extend(find_jars(os.path.join(db_dir, "lib"))) + else: + warn("Running against a source checkout. In order to get our runtime " + "dependencies we'll have to invoke Maven. Depending on the state " + "of your system, this may take ~30-45 seconds") + db_location = "core" if (binding == "basic" or binding == "basicts") else binding + project = "core" if (binding == "basic" or binding == "basicts") else binding + "-binding" + db_dir = os.path.join(ycsb_home, db_location) + # goes first so we can rely on side-effect of package + maven_says = get_classpath_from_maven(project) + # TODO when we have a version property, skip the glob + cp = find_jars(os.path.join(db_dir, "target"), + project + "*.jar") + # alredy in jar:jar:jar form + cp.append(maven_says) + cp.insert(0, os.path.join(db_dir, "conf")) + classpath = os.pathsep.join(cp) + if args.classpath: + classpath = os.pathsep.join([args.classpath, classpath]) + + ycsb_command = ([java] + args.jvm_args + + ["-cp", classpath, + main_classname, "-db", db_classname] + remaining) + if command: + ycsb_command.append(command) + print(" ".join(ycsb_command), file=sys.stderr) + try: + return subprocess.call(ycsb_command) + except OSError as e: + if e.errno == errno.ENOENT: + error('Command failed. Is java installed and on your PATH?') + return 1 + else: + raise + +if __name__ == '__main__': + sys.exit(main()) +EOF + +# Configure and start services +echo "Configuring services..." + +# Configure memcached +sudo sh -c 'echo "MEMCACHED_MEMORY=2048" > /etc/default/memcached' +sudo sh -c 'echo "-c 1024" >> /etc/memcached.conf' +sudo sh -c 'echo "-t 4" >> /etc/memcached.conf' + +# Start and enable services +sudo systemctl start memcached +sudo systemctl enable memcached +sudo systemctl start redis-server +sudo systemctl enable redis-server +sudo systemctl start mongod +sudo systemctl enable mongod + +# Function to run compression benchmark +run_compression_benchmark() { + echo "Running XZ Compression Benchmark..." + + # Create test directory + mkdir -p compression_benchmark + cd compression_benchmark + + # Create 1GB test file + echo "Creating 1GB test file..." + dd if=/dev/urandom of=testfile bs=1M count=1024 + + # Run compression test + echo "Running compression test..." + time xz -9 -T $(nproc) testfile + + # Run decompression test + echo "Running decompression test..." + time xz -d -T $(nproc) testfile.xz + + cd .. + echo "Compression benchmark completed." +} + +# Function to run memcached benchmark +run_memcached_benchmark() { + echo "Running Memcached Benchmark..." + ./bin/ycsb load memcached -s -P workloads/workloada \ + -p recordcount=1000000 \ + -p memcached.hosts=localhost \ + -p memcached.port=11211 \ + -p memcached.shutdownTimeoutMillis=30000 \ + -p memcached.opTimeout=60000 + + ./bin/ycsb run memcached -s -P workloads/workloada \ + -p operationcount=1000000 \ + -p memcached.hosts=localhost \ + -p memcached.port=11211 \ + -p readproportion=0.99 \ + -p updateproportion=0.01 \ + -p memcached.shutdownTimeoutMillis=30000 \ + -p memcached.opTimeout=60000 \ + -p maxexecutiontime=600 +} + +# Function to run MongoDB benchmark +run_mongodb_benchmark() { + echo "Running MongoDB Benchmark..." + ./bin/ycsb load mongodb -s -P workloads/workloada \ + -p recordcount=1000000 \ + -p mongodb.url=mongodb://localhost:27017/ycsb \ + -p mongodb.writeConcern=acknowledged + + ./bin/ycsb run mongodb -s -P workloads/workloada \ + -p operationcount=1000000 \ + -p mongodb.url=mongodb://localhost:27017/ycsb \ + -p readproportion=0.25 \ + -p updateproportion=0.75 \ + -p mongodb.writeConcern=acknowledged +} + +# Function to run Redis benchmark +run_redis_benchmark() { + echo "Running Redis Benchmark..." + ./bin/ycsb load redis -s -P workloads/workloada -p recordcount=1000000 + ./bin/ycsb run redis -s -P workloads/workloada \ + -p operationcount=1000000 \ + -p redis.host=localhost \ + -p redis.port=6379 \ + -p readproportion=0.5 \ + -p updateproportion=0.5 +} + +# Main benchmark execution +echo "Starting Combined Benchmark Suite..." + +# Run all benchmarks +run_compression_benchmark +run_memcached_benchmark +run_mongodb_benchmark +run_redis_benchmark + +echo "All benchmarks completed." \ No newline at end of file diff --git a/run_gapbs.sh b/run_gapbs.sh new file mode 100755 index 0000000..a5f6c9d --- /dev/null +++ b/run_gapbs.sh @@ -0,0 +1,182 @@ +#!/bin/bash + +# GAPBS Benchmark Runner +# This script automates the installation and running of the GAP Benchmark Suite + +set -e # Exit on any error + +# Configuration variables - modify these as needed +GRAPH_SIZES=(16 20 22) # 2^N vertices +NUM_ITERATIONS=16 # Number of iterations per benchmark +NUM_THREADS=8 # Number of OpenMP threads +LOG_FILE="gapbs_benchmark_$(date +%Y%m%d_%H%M%S).log" +INSTALL_DIR="$HOME/gapbs" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Logger function +log() { + echo -e "${GREEN}[$(date '+%Y-%m-%d %H:%M:%S')] $1${NC}" | tee -a "$LOG_FILE" +} + +error() { + echo -e "${RED}[ERROR] $1${NC}" | tee -a "$LOG_FILE" + exit 1 +} + +warn() { + echo -e "${YELLOW}[WARNING] $1${NC}" | tee -a "$LOG_FILE" +} + +# Check system requirements +check_requirements() { + log "Checking system requirements..." + + # Check for C++ compiler + if ! command -v g++ &> /dev/null; then + error "g++ is required but not installed. Please install build-essential." + fi + + # Check for OpenMP + if ! echo "#include " | g++ -fopenmp -x c++ - -c -o /dev/null &> /dev/null; then + warn "OpenMP not available. Installing libomp-dev..." + if command -v apt-get &> /dev/null; then + sudo apt-get update && sudo apt-get install -y libomp-dev + elif command -v yum &> /dev/null; then + sudo yum install -y libomp-devel + else + error "Could not install OpenMP. Please install manually." + fi + fi + + # Check for git + if ! command -v git &> /dev/null; then + error "git is required but not installed." + fi + + log "All requirements satisfied." +} + +# Install GAPBS +install_gapbs() { + log "Installing GAPBS..." + + if [ -d "$INSTALL_DIR" ]; then + warn "Installation directory already exists. Removing..." + rm -rf "$INSTALL_DIR" + fi + + git clone https://github.com/sbeamer/gapbs.git "$INSTALL_DIR" || error "Failed to clone repository" + cd "$INSTALL_DIR" || error "Failed to enter installation directory" + + # Compile + log "Compiling GAPBS..." + make clean + make -j "$(nproc)" || error "Compilation failed" + + log "Installation completed successfully." +} + +# Set up environment +setup_environment() { + log "Setting up environment..." + + export OMP_NUM_THREADS=$NUM_THREADS + export OMP_SCHEDULE=static + export KMP_AFFINITY=compact,1 + + # Increase stack size + ulimit -s unlimited || warn "Failed to set unlimited stack size" + + log "Environment configured." +} + +# Run benchmarks +run_benchmarks() { + local benchmarks=("bfs" "pr" "cc" "bc" "sssp" "tc") + local graph_types=("rmat" "uniform") + + log "Starting benchmark suite..." + + for size in "${GRAPH_SIZES[@]}"; do + log "Testing graphs of size 2^$size vertices..." + + for type in "${graph_types[@]}"; do + log "Generating $type graph..." + + # Check if generator exists + if [ ! -f "./generator" ]; then + error "Generator executable not found. Please ensure it is compiled." + fi + + if [ "$type" == "rmat" ]; then + ./generator -g "$size" -s "$((size+4))" || error "Failed to generate RMAT graph" + else + ./generator -g "$size" -s "$((size+4))" -r || error "Failed to generate uniform random graph" + fi + + for bench in "${benchmarks[@]}"; do + log "Running $bench benchmark..." + + # Run benchmark with timing + TIMEFORMAT="%R" + runtime=$( { time ./"$bench" -g "$size" -n "$NUM_ITERATIONS" -v; } 2>&1 ) + + # Log results + echo "Benchmark: $bench, Graph: $type, Size: 2^$size, Runtime: ${runtime}s" >> "$LOG_FILE" + done + done + done +} + +# Generate report +generate_report() { + log "Generating benchmark report..." + + echo -e "\nBenchmark Summary Report" > "report_$(date +%Y%m%d_%H%M%S).txt" + echo "=========================" >> "report_$(date +%Y%m%d_%H%M%S).txt" + echo "System Information:" >> "report_$(date +%Y%m%d_%H%M%S).txt" + echo "CPU: $(lscpu | grep "Model name" | sed 's/Model name: *//')" >> "report_$(date +%Y%m%d_%H%M%S).txt" + echo "Memory: $(free -h | awk '/^Mem:/ {print $2}')" >> "report_$(date +%Y%m%d_%H%M%S).txt" + echo "Threads: $NUM_THREADS" >> "report_$(date +%Y%m%d_%H%M%S).txt" + echo -e "\nBenchmark Results:" >> "report_$(date +%Y%m%d_%H%M%S).txt" + grep "Benchmark:" "$LOG_FILE" >> "report_$(date +%Y%m%d_%H%M%S).txt" + + log "Report generated." +} + +# Cleanup function +cleanup() { + log "Cleaning up..." + cd "$INSTALL_DIR" || return + make clean + rm -f benchmark.graph* + log "Cleanup completed." +} + +# Main execution +main() { + echo "GAP Benchmark Suite Runner" + echo "=========================" + + check_requirements + install_gapbs + setup_environment + run_benchmarks + generate_report + cleanup + + log "Benchmark suite completed successfully!" + log "Results are available in $LOG_FILE" + log "Detailed report available in report_$(date +%Y%m%d_%H%M%S).txt" +} + +# Trap Ctrl+C and call cleanup +trap cleanup INT + +# Run main function +main \ No newline at end of file diff --git a/run_redis.sh b/run_redis.sh new file mode 100755 index 0000000..dbaedf7 --- /dev/null +++ b/run_redis.sh @@ -0,0 +1,370 @@ +#!/bin/bash + +# Update and install required packages +sudo apt update +sudo apt install -y openjdk-11-jdk redis-server curl + +# Start Redis server +sudo systemctl start redis-server +sudo systemctl enable redis-server + +# Download YCSB +curl -O --location https://github.com/brianfrankcooper/YCSB/releases/download/0.17.0/ycsb-0.17.0.tar.gz +tar xfvz ycsb-0.17.0.tar.gz +cd ycsb-0.17.0 + +# Replace the bin/ycsb script +# Replace the bin/ycsb script +cat << 'EOF' > bin/ycsb +#!/usr/bin/python3 +# +# Copyright (c) 2012 - 2015 YCSB contributors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +import errno +import fnmatch +import io +import os +import shlex +import sys +import subprocess +import argparse + +BASE_URL = "https://github.com/brianfrankcooper/YCSB/tree/master/" +COMMANDS = { + "shell" : { + "command" : "", + "description" : "Interactive mode", + "main" : "site.ycsb.CommandLine", + }, + "load" : { + "command" : "-load", + "description" : "Execute the load phase", + "main" : "site.ycsb.Client", + }, + "run" : { + "command" : "-t", + "description" : "Execute the transaction phase", + "main" : "site.ycsb.Client", + }, +} + + +DATABASES = { + "accumulo" : "site.ycsb.db.accumulo.AccumuloClient", + "accumulo1.6" : "site.ycsb.db.accumulo.AccumuloClient", + "accumulo1.7" : "site.ycsb.db.accumulo.AccumuloClient", + "accumulo1.8" : "site.ycsb.db.accumulo.AccumuloClient", + "aerospike" : "site.ycsb.db.AerospikeClient", + "arangodb" : "site.ycsb.db.arangodb.ArangoDBClient", + "arangodb3" : "site.ycsb.db.arangodb.ArangoDBClient", + "asynchbase" : "site.ycsb.db.AsyncHBaseClient", + "azurecosmos" : "site.ycsb.db.AzureCosmosClient", + "azuretablestorage" : "site.ycsb.db.azuretablestorage.AzureClient", + "basic" : "site.ycsb.BasicDB", + "basicts" : "site.ycsb.BasicTSDB", + "cassandra-cql": "site.ycsb.db.CassandraCQLClient", + "cassandra2-cql": "site.ycsb.db.CassandraCQLClient", + "cloudspanner" : "site.ycsb.db.cloudspanner.CloudSpannerClient", + "couchbase" : "site.ycsb.db.CouchbaseClient", + "couchbase2" : "site.ycsb.db.couchbase2.Couchbase2Client", + "crail" : "site.ycsb.db.crail.CrailClient", + "dynamodb" : "site.ycsb.db.DynamoDBClient", + "elasticsearch": "site.ycsb.db.ElasticsearchClient", + "elasticsearch5": "site.ycsb.db.elasticsearch5.ElasticsearchClient", + "elasticsearch5-rest": "site.ycsb.db.elasticsearch5.ElasticsearchRestClient", + "foundationdb" : "site.ycsb.db.foundationdb.FoundationDBClient", + "geode" : "site.ycsb.db.GeodeClient", + "googlebigtable" : "site.ycsb.db.GoogleBigtableClient", + "googledatastore" : "site.ycsb.db.GoogleDatastoreClient", + "griddb" : "site.ycsb.db.griddb.GridDBClient", + "hbase098" : "site.ycsb.db.HBaseClient", + "hbase10" : "site.ycsb.db.HBaseClient10", + "hbase12" : "site.ycsb.db.hbase12.HBaseClient12", + "hbase14" : "site.ycsb.db.hbase14.HBaseClient14", + "hbase20" : "site.ycsb.db.hbase20.HBaseClient20", + "hypertable" : "site.ycsb.db.HypertableClient", + "ignite" : "site.ycsb.db.ignite.IgniteClient", + "ignite-sql" : "site.ycsb.db.ignite.IgniteSqlClient", + "infinispan-cs": "site.ycsb.db.InfinispanRemoteClient", + "infinispan" : "site.ycsb.db.InfinispanClient", + "jdbc" : "site.ycsb.db.JdbcDBClient", + "kudu" : "site.ycsb.db.KuduYCSBClient", + "memcached" : "site.ycsb.db.MemcachedClient", + "maprdb" : "site.ycsb.db.mapr.MapRDBClient", + "maprjsondb" : "site.ycsb.db.mapr.MapRJSONDBClient", + "mongodb" : "site.ycsb.db.MongoDbClient", + "mongodb-async": "site.ycsb.db.AsyncMongoDbClient", + "nosqldb" : "site.ycsb.db.NoSqlDbClient", + "orientdb" : "site.ycsb.db.OrientDBClient", + "postgrenosql" : "site.ycsb.postgrenosql.PostgreNoSQLDBClient", + "rados" : "site.ycsb.db.RadosClient", + "redis" : "site.ycsb.db.RedisClient", + "rest" : "site.ycsb.webservice.rest.RestClient", + "riak" : "site.ycsb.db.riak.RiakKVClient", + "rocksdb" : "site.ycsb.db.rocksdb.RocksDBClient", + "s3" : "site.ycsb.db.S3Client", + "solr" : "site.ycsb.db.solr.SolrClient", + "solr6" : "site.ycsb.db.solr6.SolrClient", + "tarantool" : "site.ycsb.db.TarantoolClient", + "tablestore" : "site.ycsb.db.tablestore.TableStoreClient" +} + +OPTIONS = { + "-P file" : "Specify workload file", + "-p key=value" : "Override workload property", + "-s" : "Print status to stderr", + "-target n" : "Target ops/sec (default: unthrottled)", + "-threads n" : "Number of client threads (default: 1)", + "-cp path" : "Additional Java classpath entries", + "-jvm-args args" : "Additional arguments to the JVM", +} + +def usage(): + output = io.StringIO() + print("%s command database [options]" % sys.argv[0], file=output) + + print("\nCommands:", file=output) + for command in sorted(COMMANDS.keys()): + print(" %s %s" % (command.ljust(14), + COMMANDS[command]["description"]), file=output) + + print("\nDatabases:", file=output) + for db in sorted(DATABASES.keys()): + print(" %s %s" % (db.ljust(14), BASE_URL + + db.split("-")[0]), file=output) + + print("\nOptions:", file=output) + for option in sorted(OPTIONS.keys()): + print(" %s %s" % (option.ljust(14), OPTIONS[option]), file=output) + + print("""\nWorkload Files: + There are various predefined workloads under workloads/ directory. + See https://github.com/brianfrankcooper/YCSB/wiki/Core-Properties + for the list of workload properties.""", file=output) + + return output.getvalue() + +# Python 2.6 doesn't have check_output. Add the method as it is in Python 2.7 +# Based on https://github.com/python/cpython/blob/2.7/Lib/subprocess.py#L545 +def check_output(*popenargs, **kwargs): + r"""Run command with arguments and return its output as a byte string. + + If the exit code was non-zero it raises a CalledProcessError. The + CalledProcessError object will have the return code in the returncode + attribute and output in the output attribute. + + The arguments are the same as for the Popen constructor. Example: + + >>> check_output(["ls", "-l", "/dev/null"]) + 'crw-rw-rw- 1 root root 1, 3 Oct 18 2007 /dev/null\n' + + The stdout argument is not allowed as it is used internally. + To capture standard error in the result, use stderr=STDOUT. + + >>> check_output(["/bin/sh", "-c", + ... "ls -l non_existent_file ; exit 0"], + ... stderr=STDOUT) + 'ls: non_existent_file: No such file or directory\n' + """ + if 'stdout' in kwargs: + raise ValueError('stdout argument not allowed, it will be overridden.') + process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) + output, unused_err = process.communicate() + retcode = process.poll() + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = popenargs[0] + error = subprocess.CalledProcessError(retcode, cmd) + error.output = output + raise error + return output + + +def debug(message): + print("[DEBUG] ", message, file=sys.stderr) + +def warn(message): + print("[WARN] ", message, file=sys.stderr) + +def error(message): + print("[ERROR] ", message, file=sys.stderr) + +def find_jars(dir, glob='*.jar'): + jars = [] + for (dirpath, dirnames, filenames) in os.walk(dir): + for filename in fnmatch.filter(filenames, glob): + jars.append(os.path.join(dirpath, filename)) + return jars + +def get_ycsb_home(): + dir = os.path.abspath(os.path.dirname(sys.argv[0])) + while "LICENSE.txt" not in os.listdir(dir): + dir = os.path.join(dir, os.path.pardir) + return os.path.abspath(dir) + +def is_distribution(): + # If there's a top level pom, we're a source checkout. otherwise a dist artifact + return "pom.xml" not in os.listdir(get_ycsb_home()) + +# Run the maven dependency plugin to get the local jar paths. +# presumes maven can run, so should only be run on source checkouts +# will invoke the 'package' goal for the given binding in order to resolve intra-project deps +# presumes maven properly handles system-specific path separators +# Given module is full module name eg. 'core' or 'couchbase-binding' +def get_classpath_from_maven(module): + try: + debug("Running 'mvn -pl site.ycsb:" + module + " -am package -DskipTests " + "dependency:build-classpath -DincludeScope=compile -Dmdep.outputFilterFile=true'") + mvn_output = subprocess.check_output(["mvn", "-pl", "site.ycsb:" + module, + "-am", "package", "-DskipTests", + "dependency:build-classpath", + "-DincludeScope=compile", + "-Dmdep.outputFilterFile=true"], universal_newlines=True) + line = [x for x in mvn_output.splitlines() if x.startswith("classpath=")][-1:] + return line[0][len("classpath="):] + except subprocess.CalledProcessError as err: + error("Attempting to generate a classpath from Maven failed " + f"with return code '{err.returncode}'. The output from " + "Maven follows, try running " + "'mvn -DskipTests package dependency:build=classpath' on your " + "own and correct errors." + os.linesep + os.linesep + "mvn output:" + os.linesep + + err.output) + sys.exit(err.returncode) + + +def main(): + p = argparse.ArgumentParser( + usage=usage(), + formatter_class=argparse.RawDescriptionHelpFormatter) + p.add_argument('-cp', dest='classpath', help="""Additional classpath + entries, e.g. '-cp /tmp/hbase-1.0.1.1/conf'. Will be + prepended to the YCSB classpath.""") + p.add_argument("-jvm-args", default=[], type=shlex.split, + help="""Additional arguments to pass to 'java', e.g. + '-Xmx4g'""") + p.add_argument("command", choices=sorted(COMMANDS), + help="""Command to run.""") + p.add_argument("database", choices=sorted(DATABASES), + help="""Database to test.""") + args, remaining = p.parse_known_args() + ycsb_home = get_ycsb_home() + + # Use JAVA_HOME to find java binary if set, otherwise just use PATH. + java = "java" + java_home = os.getenv("JAVA_HOME") + if java_home: + java = os.path.join(java_home, "bin", "java") + db_classname = DATABASES[args.database] + command = COMMANDS[args.command]["command"] + main_classname = COMMANDS[args.command]["main"] + + # Classpath set up + binding = args.database.split("-")[0] + + if binding == "accumulo": + warn("The 'accumulo' client has been deprecated in favor of version " + "specific bindings. This name still maps to the binding for " + "Accumulo 1.6, which is named 'accumulo-1.6'. This alias will " + "be removed in a future YCSB release.") + binding = "accumulo1.6" + + if binding == "accumulo1.6": + warn("The 'accumulo1.6' client has been deprecated because Accumulo 1.6 " + "is EOM. If you are using Accumulo 1.7+ try using the 'accumulo1.7' " + "client instead.") + + if binding == "cassandra2": + warn("The 'cassandra2-cql' client has been deprecated. It has been " + "renamed to simply 'cassandra-cql'. This alias will be removed" + " in the next YCSB release.") + binding = "cassandra" + + if binding == "couchbase": + warn("The 'couchbase' client has been deprecated. If you are using " + "Couchbase 4.0+ try using the 'couchbase2' client instead.") + + if binding == "hbase098": + warn("The 'hbase098' client has been deprecated because HBase 0.98 " + "is EOM. If you are using HBase 1.2+ try using the 'hbase12' " + "client instead.") + + if binding == "hbase10": + warn("The 'hbase10' client has been deprecated because HBase 1.0 " + "is EOM. If you are using HBase 1.2+ try using the 'hbase12' " + "client instead.") + + if binding == "arangodb3": + warn("The 'arangodb3' client has been deprecated. The binding 'arangodb' " + "now covers every ArangoDB version. This alias will be removed " + "in the next YCSB release.") + binding = "arangodb" + + if is_distribution(): + db_dir = os.path.join(ycsb_home, binding + "-binding") + # include top-level conf for when we're a binding-specific artifact. + # If we add top-level conf to the general artifact, starting here + # will allow binding-specific conf to override (because it's prepended) + cp = [os.path.join(ycsb_home, "conf")] + cp.extend(find_jars(os.path.join(ycsb_home, "lib"))) + cp.extend(find_jars(os.path.join(db_dir, "lib"))) + else: + warn("Running against a source checkout. In order to get our runtime " + "dependencies we'll have to invoke Maven. Depending on the state " + "of your system, this may take ~30-45 seconds") + db_location = "core" if (binding == "basic" or binding == "basicts") else binding + project = "core" if (binding == "basic" or binding == "basicts") else binding + "-binding" + db_dir = os.path.join(ycsb_home, db_location) + # goes first so we can rely on side-effect of package + maven_says = get_classpath_from_maven(project) + # TODO when we have a version property, skip the glob + cp = find_jars(os.path.join(db_dir, "target"), + project + "*.jar") + # alredy in jar:jar:jar form + cp.append(maven_says) + cp.insert(0, os.path.join(db_dir, "conf")) + classpath = os.pathsep.join(cp) + if args.classpath: + classpath = os.pathsep.join([args.classpath, classpath]) + + ycsb_command = ([java] + args.jvm_args + + ["-cp", classpath, + main_classname, "-db", db_classname] + remaining) + if command: + ycsb_command.append(command) + print(" ".join(ycsb_command), file=sys.stderr) + try: + return subprocess.call(ycsb_command) + except OSError as e: + if e.errno == errno.ENOENT: + error('Command failed. Is java installed and on your PATH?') + return 1 + else: + raise + +if __name__ == '__main__': + sys.exit(main()) +EOF + +# Make the ycsb script executable +chmod +x bin/ycsb + +# Run YCSB load and run redis commands +./bin/ycsb load redis -s -P workloads/workloada -p recordcount=1000000 +./bin/ycsb run redis -s -P workloads/workloada -p operationcount=1000000 -p redis.host=localhost -p redis.port=6379 -p readproportion=0.5 -p updateproportion=0.5 + + diff --git a/scripts/setup-benchmarks/install_mongodb.sh b/scripts/setup-benchmarks/install_mongodb.sh index b02eab2..6bff9ae 100644 --- a/scripts/setup-benchmarks/install_mongodb.sh +++ b/scripts/setup-benchmarks/install_mongodb.sh @@ -1,6 +1,6 @@ #!/bin/bash -# apt-get update && apt-get install -y openjdk-11-jdk && echo 'export PATH=$PATH:/usr/lib/jvm/java-11-openjdk-amd64/bin' >> ~/.bashrc && source ~/.bashrc +apt-get update && apt-get install -y openjdk-11-jdk && echo 'export PATH=$PATH:/usr/lib/jvm/java-11-openjdk-amd64/bin' >> ~/.bashrc && source ~/.bashrc # Update and install required packages apt update @@ -10,6 +10,8 @@ curl -O --location https://github.com/brianfrankcooper/YCSB/releases/download/0. tar xfvz ycsb-0.17.0.tar.gz cd ycsb-0.17.0 +pwd + export YCSB_HOME=$(pwd) @@ -375,8 +377,14 @@ echo "deb [ arch=amd64,arm64 signed-by=/usr/share/keyrings/mongodb-server-6.0.gp apt-get update apt-get install -y mongodb-org -systemctl start mongod -systemctl enable mongod + +mkdir -p /data/db + +# Start MongoDB server +mongod --dbpath /data/db --fork --logpath /var/log/mongodb.log + +# systemctl start mongod +# systemctl enable mongod # sudo systemctl status mongod From e394c95080d36bf214f1507ae203efe371adefe8 Mon Sep 17 00:00:00 2001 From: Anish Palakurthi Date: Mon, 11 Nov 2024 22:57:28 +0000 Subject: [PATCH 4/5] modified export --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 165c0e5..2386f9d 100644 --- a/Makefile +++ b/Makefile @@ -193,6 +193,9 @@ docker: install-ycsb: @echo "Installing ycsb..." @source scripts/setup-benchmarks/install_ycsb.sh + export YCSB_HOME=/KernMLOps/ycsb-0.17.0 + +export YCSB_HOME=/KernMLOps/ycsb-0.17.0 # Must be run before using any mongodb benchmark (once) install-mongodb: From 18341e7e7bd4ecca4cace6c5c9e3d66cc2461014 Mon Sep 17 00:00:00 2001 From: Anish Palakurthi Date: Mon, 18 Nov 2024 22:13:29 +0000 Subject: [PATCH 5/5] addressed pr comments --- .../kernmlops/kernmlops_benchmark/__init__.py | 2 +- .../kernmlops/kernmlops_benchmark/mongodb.py | 26 +- run_gapbs.sh | 182 -------- .../run_benchmarks/run_memcached.sh | 130 ++---- scripts/run_benchmarks/run_mongodb.sh | 390 ++++++++++++++++++ .../run_benchmarks/run_redis.sh | 6 +- scripts/run_benchmarks/run_xz.sh | 12 + scripts/setup-benchmarks/install_mongodb.sh | 21 +- scripts/setup-benchmarks/install_ycsb.sh | 4 +- 9 files changed, 461 insertions(+), 312 deletions(-) delete mode 100755 run_gapbs.sh rename run_benchmarks.sh => scripts/run_benchmarks/run_memcached.sh (83%) create mode 100755 scripts/run_benchmarks/run_mongodb.sh rename run_redis.sh => scripts/run_benchmarks/run_redis.sh (99%) create mode 100755 scripts/run_benchmarks/run_xz.sh diff --git a/python/kernmlops/kernmlops_benchmark/__init__.py b/python/kernmlops/kernmlops_benchmark/__init__.py index 058cb4a..13bbf14 100644 --- a/python/kernmlops/kernmlops_benchmark/__init__.py +++ b/python/kernmlops/kernmlops_benchmark/__init__.py @@ -13,8 +13,8 @@ BenchmarkRunningError, ) from kernmlops_benchmark.gap import GapBenchmark -from kernmlops_benchmark.mongodb import MongoDbBenchmark from kernmlops_benchmark.linux_build import LinuxBuildBenchmark +from kernmlops_benchmark.mongodb import MongoDbBenchmark from kernmlops_config import ConfigBase benchmarks: Mapping[str, type[Benchmark]] = { diff --git a/python/kernmlops/kernmlops_benchmark/mongodb.py b/python/kernmlops/kernmlops_benchmark/mongodb.py index e7e91ad..fcdcecf 100644 --- a/python/kernmlops/kernmlops_benchmark/mongodb.py +++ b/python/kernmlops/kernmlops_benchmark/mongodb.py @@ -1,6 +1,6 @@ import subprocess from dataclasses import dataclass -from typing import Literal, cast +from typing import cast from data_schema import GraphEngine, demote from kernmlops_benchmark.benchmark import Benchmark, GenericBenchmarkConfig @@ -13,11 +13,10 @@ @dataclass(frozen=True) -class BenchmarkConfig(ConfigBase): - - recordCount: int = 1000000 +class MongoDbConfig(ConfigBase): + record_count: int = 1000000 readProportion: float = 0.25 - updateProportion: float =0.75 + updateProportion: float = 0.75 class MongoDbBenchmark(Benchmark): @@ -28,15 +27,15 @@ def name(cls) -> str: @classmethod def default_config(cls) -> ConfigBase: - return BenchmarkConfig() + return MongoDbConfig() @classmethod def from_config(cls, config: ConfigBase) -> "Benchmark": generic_config = cast(GenericBenchmarkConfig, getattr(config, "generic")) - gap_config = cast(BenchmarkConfig, getattr(config, cls.name())) - return MongoDbBenchmark(generic_config=generic_config, config=gap_config) + mongodb_config = cast(MongoDbConfig, getattr(config, cls.name())) + return MongoDbBenchmark(generic_config=generic_config, config=mongodb_config) - def __init__(self, *, generic_config: GenericBenchmarkConfig, config: BenchmarkConfig): + def __init__(self, *, generic_config: GenericBenchmarkConfig, config: MongoDbConfig): self.generic_config = generic_config self.config = config self.benchmark_dir = self.generic_config.get_benchmark_dir() / self.name() @@ -56,13 +55,14 @@ def run(self) -> None: if self.process is not None: raise BenchmarkRunningError() - bash_file_path = self.benchmark_dir / "run_mongodb.sh" # Add the path to your bash file here - print(bash_file_path) + bash_file_path = "../scripts/run_benchmarks/run_mongodb.sh" self.process = subprocess.Popen( [ "bash", - bash_file_path, - + str(bash_file_path), + str(self.config.record_count), + str(self.config.readProportion), + str(self.config.updateProportion) ], preexec_fn=demote(), stdout=subprocess.DEVNULL, diff --git a/run_gapbs.sh b/run_gapbs.sh deleted file mode 100755 index a5f6c9d..0000000 --- a/run_gapbs.sh +++ /dev/null @@ -1,182 +0,0 @@ -#!/bin/bash - -# GAPBS Benchmark Runner -# This script automates the installation and running of the GAP Benchmark Suite - -set -e # Exit on any error - -# Configuration variables - modify these as needed -GRAPH_SIZES=(16 20 22) # 2^N vertices -NUM_ITERATIONS=16 # Number of iterations per benchmark -NUM_THREADS=8 # Number of OpenMP threads -LOG_FILE="gapbs_benchmark_$(date +%Y%m%d_%H%M%S).log" -INSTALL_DIR="$HOME/gapbs" - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' # No Color - -# Logger function -log() { - echo -e "${GREEN}[$(date '+%Y-%m-%d %H:%M:%S')] $1${NC}" | tee -a "$LOG_FILE" -} - -error() { - echo -e "${RED}[ERROR] $1${NC}" | tee -a "$LOG_FILE" - exit 1 -} - -warn() { - echo -e "${YELLOW}[WARNING] $1${NC}" | tee -a "$LOG_FILE" -} - -# Check system requirements -check_requirements() { - log "Checking system requirements..." - - # Check for C++ compiler - if ! command -v g++ &> /dev/null; then - error "g++ is required but not installed. Please install build-essential." - fi - - # Check for OpenMP - if ! echo "#include " | g++ -fopenmp -x c++ - -c -o /dev/null &> /dev/null; then - warn "OpenMP not available. Installing libomp-dev..." - if command -v apt-get &> /dev/null; then - sudo apt-get update && sudo apt-get install -y libomp-dev - elif command -v yum &> /dev/null; then - sudo yum install -y libomp-devel - else - error "Could not install OpenMP. Please install manually." - fi - fi - - # Check for git - if ! command -v git &> /dev/null; then - error "git is required but not installed." - fi - - log "All requirements satisfied." -} - -# Install GAPBS -install_gapbs() { - log "Installing GAPBS..." - - if [ -d "$INSTALL_DIR" ]; then - warn "Installation directory already exists. Removing..." - rm -rf "$INSTALL_DIR" - fi - - git clone https://github.com/sbeamer/gapbs.git "$INSTALL_DIR" || error "Failed to clone repository" - cd "$INSTALL_DIR" || error "Failed to enter installation directory" - - # Compile - log "Compiling GAPBS..." - make clean - make -j "$(nproc)" || error "Compilation failed" - - log "Installation completed successfully." -} - -# Set up environment -setup_environment() { - log "Setting up environment..." - - export OMP_NUM_THREADS=$NUM_THREADS - export OMP_SCHEDULE=static - export KMP_AFFINITY=compact,1 - - # Increase stack size - ulimit -s unlimited || warn "Failed to set unlimited stack size" - - log "Environment configured." -} - -# Run benchmarks -run_benchmarks() { - local benchmarks=("bfs" "pr" "cc" "bc" "sssp" "tc") - local graph_types=("rmat" "uniform") - - log "Starting benchmark suite..." - - for size in "${GRAPH_SIZES[@]}"; do - log "Testing graphs of size 2^$size vertices..." - - for type in "${graph_types[@]}"; do - log "Generating $type graph..." - - # Check if generator exists - if [ ! -f "./generator" ]; then - error "Generator executable not found. Please ensure it is compiled." - fi - - if [ "$type" == "rmat" ]; then - ./generator -g "$size" -s "$((size+4))" || error "Failed to generate RMAT graph" - else - ./generator -g "$size" -s "$((size+4))" -r || error "Failed to generate uniform random graph" - fi - - for bench in "${benchmarks[@]}"; do - log "Running $bench benchmark..." - - # Run benchmark with timing - TIMEFORMAT="%R" - runtime=$( { time ./"$bench" -g "$size" -n "$NUM_ITERATIONS" -v; } 2>&1 ) - - # Log results - echo "Benchmark: $bench, Graph: $type, Size: 2^$size, Runtime: ${runtime}s" >> "$LOG_FILE" - done - done - done -} - -# Generate report -generate_report() { - log "Generating benchmark report..." - - echo -e "\nBenchmark Summary Report" > "report_$(date +%Y%m%d_%H%M%S).txt" - echo "=========================" >> "report_$(date +%Y%m%d_%H%M%S).txt" - echo "System Information:" >> "report_$(date +%Y%m%d_%H%M%S).txt" - echo "CPU: $(lscpu | grep "Model name" | sed 's/Model name: *//')" >> "report_$(date +%Y%m%d_%H%M%S).txt" - echo "Memory: $(free -h | awk '/^Mem:/ {print $2}')" >> "report_$(date +%Y%m%d_%H%M%S).txt" - echo "Threads: $NUM_THREADS" >> "report_$(date +%Y%m%d_%H%M%S).txt" - echo -e "\nBenchmark Results:" >> "report_$(date +%Y%m%d_%H%M%S).txt" - grep "Benchmark:" "$LOG_FILE" >> "report_$(date +%Y%m%d_%H%M%S).txt" - - log "Report generated." -} - -# Cleanup function -cleanup() { - log "Cleaning up..." - cd "$INSTALL_DIR" || return - make clean - rm -f benchmark.graph* - log "Cleanup completed." -} - -# Main execution -main() { - echo "GAP Benchmark Suite Runner" - echo "=========================" - - check_requirements - install_gapbs - setup_environment - run_benchmarks - generate_report - cleanup - - log "Benchmark suite completed successfully!" - log "Results are available in $LOG_FILE" - log "Detailed report available in report_$(date +%Y%m%d_%H%M%S).txt" -} - -# Trap Ctrl+C and call cleanup -trap cleanup INT - -# Run main function -main \ No newline at end of file diff --git a/run_benchmarks.sh b/scripts/run_benchmarks/run_memcached.sh similarity index 83% rename from run_benchmarks.sh rename to scripts/run_benchmarks/run_memcached.sh index df27cb8..7c5dce4 100755 --- a/run_benchmarks.sh +++ b/scripts/run_benchmarks/run_memcached.sh @@ -1,15 +1,14 @@ #!/bin/bash # Update and install required packages -sudo apt-get update -sudo apt-get install -y openjdk-11-jdk redis-server memcached netcat xz-utils mongodb-org curl gnupg +sudo apt update # Download YCSB curl -O --location https://github.com/brianfrankcooper/YCSB/releases/download/0.17.0/ycsb-0.17.0.tar.gz tar xfvz ycsb-0.17.0.tar.gz cd ycsb-0.17.0 -cat << 'EOF' > bin/ycsb +cat <<'EOF' >bin/ycsb #!/usr/bin/python3 # # Copyright (c) 2012 - 2015 YCSB contributors. All rights reserved. @@ -328,7 +327,7 @@ def main(): # TODO when we have a version property, skip the glob cp = find_jars(os.path.join(db_dir, "target"), project + "*.jar") - # alredy in jar:jar:jar form + # already in jar:jar:jar form cp.append(maven_says) cp.insert(0, os.path.join(db_dir, "conf")) classpath = os.pathsep.join(cp) @@ -354,102 +353,43 @@ if __name__ == '__main__': sys.exit(main()) EOF -# Configure and start services -echo "Configuring services..." +# Install and configure memcached +sudo apt-get update +sudo apt-get install -y memcached netcat # Configure memcached sudo sh -c 'echo "MEMCACHED_MEMORY=2048" > /etc/default/memcached' sudo sh -c 'echo "-c 1024" >> /etc/memcached.conf' sudo sh -c 'echo "-t 4" >> /etc/memcached.conf' -# Start and enable services -sudo systemctl start memcached +# Restart memcached with new configuration +sudo systemctl restart memcached sudo systemctl enable memcached -sudo systemctl start redis-server -sudo systemctl enable redis-server -sudo systemctl start mongod -sudo systemctl enable mongod - -# Function to run compression benchmark -run_compression_benchmark() { - echo "Running XZ Compression Benchmark..." - - # Create test directory - mkdir -p compression_benchmark - cd compression_benchmark - - # Create 1GB test file - echo "Creating 1GB test file..." - dd if=/dev/urandom of=testfile bs=1M count=1024 - - # Run compression test - echo "Running compression test..." - time xz -9 -T $(nproc) testfile - - # Run decompression test - echo "Running decompression test..." - time xz -d -T $(nproc) testfile.xz - - cd .. - echo "Compression benchmark completed." -} - -# Function to run memcached benchmark -run_memcached_benchmark() { - echo "Running Memcached Benchmark..." - ./bin/ycsb load memcached -s -P workloads/workloada \ - -p recordcount=1000000 \ - -p memcached.hosts=localhost \ - -p memcached.port=11211 \ - -p memcached.shutdownTimeoutMillis=30000 \ - -p memcached.opTimeout=60000 - - ./bin/ycsb run memcached -s -P workloads/workloada \ - -p operationcount=1000000 \ - -p memcached.hosts=localhost \ - -p memcached.port=11211 \ - -p readproportion=0.99 \ - -p updateproportion=0.01 \ - -p memcached.shutdownTimeoutMillis=30000 \ - -p memcached.opTimeout=60000 \ - -p maxexecutiontime=600 -} - -# Function to run MongoDB benchmark -run_mongodb_benchmark() { - echo "Running MongoDB Benchmark..." - ./bin/ycsb load mongodb -s -P workloads/workloada \ - -p recordcount=1000000 \ - -p mongodb.url=mongodb://localhost:27017/ycsb \ - -p mongodb.writeConcern=acknowledged - - ./bin/ycsb run mongodb -s -P workloads/workloada \ - -p operationcount=1000000 \ - -p mongodb.url=mongodb://localhost:27017/ycsb \ - -p readproportion=0.25 \ - -p updateproportion=0.75 \ - -p mongodb.writeConcern=acknowledged -} - -# Function to run Redis benchmark -run_redis_benchmark() { - echo "Running Redis Benchmark..." - ./bin/ycsb load redis -s -P workloads/workloada -p recordcount=1000000 - ./bin/ycsb run redis -s -P workloads/workloada \ - -p operationcount=1000000 \ - -p redis.host=localhost \ - -p redis.port=6379 \ - -p readproportion=0.5 \ - -p updateproportion=0.5 -} - -# Main benchmark execution -echo "Starting Combined Benchmark Suite..." - -# Run all benchmarks -run_compression_benchmark -run_memcached_benchmark -run_mongodb_benchmark -run_redis_benchmark -echo "All benchmarks completed." \ No newline at end of file +# Verify memcached status +# echo 'stats' | nc localhost 11211 +# echo 'version' | nc localhost 11211 + +# # Check memory status +# free -m +# cat /proc/meminfo | grep Mem +# echo "stats" | nc localhost 11211 | grep bytes + +# Run YCSB load and run memcached commands +./bin/ycsb load memcached -s -P workloads/workloada \ + -p recordcount=1000000 \ + -p memcached.hosts=localhost \ + -p memcached.port=11211 \ + -p memcached.shutdownTimeoutMillis=30000 \ + -p memcached.opTimeout=60000 + +./bin/ycsb run memcached -s -P workloads/workloada \ + -p operationcount=1000000 \ + -p memcached.hosts=localhost \ + -p memcached.port=11211 \ + -p readproportion=0.99 \ + -p updateproportion=0.01 \ + -p memcached.shutdownTimeoutMillis=30000 \ + -p memcached.opTimeout=60000 \ + -p maxexecutiontime=600 +# -threads 8 diff --git a/scripts/run_benchmarks/run_mongodb.sh b/scripts/run_benchmarks/run_mongodb.sh new file mode 100755 index 0000000..0bcb8dc --- /dev/null +++ b/scripts/run_benchmarks/run_mongodb.sh @@ -0,0 +1,390 @@ +#!/bin/bash + +# Update and install required packages +sudo apt update + +# Download YCSB +curl -O --location https://github.com/brianfrankcooper/YCSB/releases/download/0.17.0/ycsb-0.17.0.tar.gz +tar xfvz ycsb-0.17.0.tar.gz +cd ycsb-0.17.0 + +export YCSB_HOME=$(pwd) + +# Replace the bin/ycsb script +# Replace the bin/ycsb script +cat <<'EOF' >bin/ycsb +#!/usr/bin/python3 +# +# Copyright (c) 2012 - 2015 YCSB contributors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you +# may not use this file except in compliance with the License. You +# may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. See accompanying +# LICENSE file. +# + +import errno +import fnmatch +import io +import os +import shlex +import sys +import subprocess +import argparse + +BASE_URL = "https://github.com/brianfrankcooper/YCSB/tree/master/" +COMMANDS = { + "shell" : { + "command" : "", + "description" : "Interactive mode", + "main" : "site.ycsb.CommandLine", + }, + "load" : { + "command" : "-load", + "description" : "Execute the load phase", + "main" : "site.ycsb.Client", + }, + "run" : { + "command" : "-t", + "description" : "Execute the transaction phase", + "main" : "site.ycsb.Client", + }, +} + + +DATABASES = { + "accumulo" : "site.ycsb.db.accumulo.AccumuloClient", + "accumulo1.6" : "site.ycsb.db.accumulo.AccumuloClient", + "accumulo1.7" : "site.ycsb.db.accumulo.AccumuloClient", + "accumulo1.8" : "site.ycsb.db.accumulo.AccumuloClient", + "aerospike" : "site.ycsb.db.AerospikeClient", + "arangodb" : "site.ycsb.db.arangodb.ArangoDBClient", + "arangodb3" : "site.ycsb.db.arangodb.ArangoDBClient", + "asynchbase" : "site.ycsb.db.AsyncHBaseClient", + "azurecosmos" : "site.ycsb.db.AzureCosmosClient", + "azuretablestorage" : "site.ycsb.db.azuretablestorage.AzureClient", + "basic" : "site.ycsb.BasicDB", + "basicts" : "site.ycsb.BasicTSDB", + "cassandra-cql": "site.ycsb.db.CassandraCQLClient", + "cassandra2-cql": "site.ycsb.db.CassandraCQLClient", + "cloudspanner" : "site.ycsb.db.cloudspanner.CloudSpannerClient", + "couchbase" : "site.ycsb.db.CouchbaseClient", + "couchbase2" : "site.ycsb.db.couchbase2.Couchbase2Client", + "crail" : "site.ycsb.db.crail.CrailClient", + "dynamodb" : "site.ycsb.db.DynamoDBClient", + "elasticsearch": "site.ycsb.db.ElasticsearchClient", + "elasticsearch5": "site.ycsb.db.elasticsearch5.ElasticsearchClient", + "elasticsearch5-rest": "site.ycsb.db.elasticsearch5.ElasticsearchRestClient", + "foundationdb" : "site.ycsb.db.foundationdb.FoundationDBClient", + "geode" : "site.ycsb.db.GeodeClient", + "googlebigtable" : "site.ycsb.db.GoogleBigtableClient", + "googledatastore" : "site.ycsb.db.GoogleDatastoreClient", + "griddb" : "site.ycsb.db.griddb.GridDBClient", + "hbase098" : "site.ycsb.db.HBaseClient", + "hbase10" : "site.ycsb.db.HBaseClient10", + "hbase12" : "site.ycsb.db.hbase12.HBaseClient12", + "hbase14" : "site.ycsb.db.hbase14.HBaseClient14", + "hbase20" : "site.ycsb.db.hbase20.HBaseClient20", + "hypertable" : "site.ycsb.db.HypertableClient", + "ignite" : "site.ycsb.db.ignite.IgniteClient", + "ignite-sql" : "site.ycsb.db.ignite.IgniteSqlClient", + "infinispan-cs": "site.ycsb.db.InfinispanRemoteClient", + "infinispan" : "site.ycsb.db.InfinispanClient", + "jdbc" : "site.ycsb.db.JdbcDBClient", + "kudu" : "site.ycsb.db.KuduYCSBClient", + "memcached" : "site.ycsb.db.MemcachedClient", + "maprdb" : "site.ycsb.db.mapr.MapRDBClient", + "maprjsondb" : "site.ycsb.db.mapr.MapRJSONDBClient", + "mongodb" : "site.ycsb.db.MongoDbClient", + "mongodb-async": "site.ycsb.db.AsyncMongoDbClient", + "nosqldb" : "site.ycsb.db.NoSqlDbClient", + "orientdb" : "site.ycsb.db.OrientDBClient", + "postgrenosql" : "site.ycsb.postgrenosql.PostgreNoSQLDBClient", + "rados" : "site.ycsb.db.RadosClient", + "redis" : "site.ycsb.db.RedisClient", + "rest" : "site.ycsb.webservice.rest.RestClient", + "riak" : "site.ycsb.db.riak.RiakKVClient", + "rocksdb" : "site.ycsb.db.rocksdb.RocksDBClient", + "s3" : "site.ycsb.db.S3Client", + "solr" : "site.ycsb.db.solr.SolrClient", + "solr6" : "site.ycsb.db.solr6.SolrClient", + "tarantool" : "site.ycsb.db.TarantoolClient", + "tablestore" : "site.ycsb.db.tablestore.TableStoreClient" +} + +OPTIONS = { + "-P file" : "Specify workload file", + "-p key=value" : "Override workload property", + "-s" : "Print status to stderr", + "-target n" : "Target ops/sec (default: unthrottled)", + "-threads n" : "Number of client threads (default: 1)", + "-cp path" : "Additional Java classpath entries", + "-jvm-args args" : "Additional arguments to the JVM", +} + +def usage(): + output = io.StringIO() + print("%s command database [options]" % sys.argv[0], file=output) + + print("\nCommands:", file=output) + for command in sorted(COMMANDS.keys()): + print(" %s %s" % (command.ljust(14), + COMMANDS[command]["description"]), file=output) + + print("\nDatabases:", file=output) + for db in sorted(DATABASES.keys()): + print(" %s %s" % (db.ljust(14), BASE_URL + + db.split("-")[0]), file=output) + + print("\nOptions:", file=output) + for option in sorted(OPTIONS.keys()): + print(" %s %s" % (option.ljust(14), OPTIONS[option]), file=output) + + print("""\nWorkload Files: + There are various predefined workloads under workloads/ directory. + See https://github.com/brianfrankcooper/YCSB/wiki/Core-Properties + for the list of workload properties.""", file=output) + + return output.getvalue() + +# Python 2.6 doesn't have check_output. Add the method as it is in Python 2.7 +# Based on https://github.com/python/cpython/blob/2.7/Lib/subprocess.py#L545 +def check_output(*popenargs, **kwargs): + r"""Run command with arguments and return its output as a byte string. + + If the exit code was non-zero it raises a CalledProcessError. The + CalledProcessError object will have the return code in the returncode + attribute and output in the output attribute. + + The arguments are the same as for the Popen constructor. Example: + + >>> check_output(["ls", "-l", "/dev/null"]) + 'crw-rw-rw- 1 root root 1, 3 Oct 18 2007 /dev/null\n' + + The stdout argument is not allowed as it is used internally. + To capture standard error in the result, use stderr=STDOUT. + + >>> check_output(["/bin/sh", "-c", + ... "ls -l non_existent_file ; exit 0"], + ... stderr=STDOUT) + 'ls: non_existent_file: No such file or directory\n' + """ + if 'stdout' in kwargs: + raise ValueError('stdout argument not allowed, it will be overridden.') + process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) + output, unused_err = process.communicate() + retcode = process.poll() + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = popenargs[0] + error = subprocess.CalledProcessError(retcode, cmd) + error.output = output + raise error + return output + + +def debug(message): + print("[DEBUG] ", message, file=sys.stderr) + +def warn(message): + print("[WARN] ", message, file=sys.stderr) + +def error(message): + print("[ERROR] ", message, file=sys.stderr) + +def find_jars(dir, glob='*.jar'): + jars = [] + for (dirpath, dirnames, filenames) in os.walk(dir): + for filename in fnmatch.filter(filenames, glob): + jars.append(os.path.join(dirpath, filename)) + return jars + +def get_ycsb_home(): + dir = os.path.abspath(os.path.dirname(sys.argv[0])) + while "LICENSE.txt" not in os.listdir(dir): + dir = os.path.join(dir, os.path.pardir) + return os.path.abspath(dir) + +def is_distribution(): + # If there's a top level pom, we're a source checkout. otherwise a dist artifact + return "pom.xml" not in os.listdir(get_ycsb_home()) + +# Run the maven dependency plugin to get the local jar paths. +# presumes maven can run, so should only be run on source checkouts +# will invoke the 'package' goal for the given binding in order to resolve intra-project deps +# presumes maven properly handles system-specific path separators +# Given module is full module name eg. 'core' or 'couchbase-binding' +def get_classpath_from_maven(module): + try: + debug("Running 'mvn -pl site.ycsb:" + module + " -am package -DskipTests " + "dependency:build-classpath -DincludeScope=compile -Dmdep.outputFilterFile=true'") + mvn_output = subprocess.check_output(["mvn", "-pl", "site.ycsb:" + module, + "-am", "package", "-DskipTests", + "dependency:build-classpath", + "-DincludeScope=compile", + "-Dmdep.outputFilterFile=true"], universal_newlines=True) + line = [x for x in mvn_output.splitlines() if x.startswith("classpath=")][-1:] + return line[0][len("classpath="):] + except subprocess.CalledProcessError as err: + error("Attempting to generate a classpath from Maven failed " + f"with return code '{err.returncode}'. The output from " + "Maven follows, try running " + "'mvn -DskipTests package dependency:build=classpath' on your " + "own and correct errors." + os.linesep + os.linesep + "mvn output:" + os.linesep + + err.output) + sys.exit(err.returncode) + + +def main(): + p = argparse.ArgumentParser( + usage=usage(), + formatter_class=argparse.RawDescriptionHelpFormatter) + p.add_argument('-cp', dest='classpath', help="""Additional classpath + entries, e.g. '-cp /tmp/hbase-1.0.1.1/conf'. Will be + prepended to the YCSB classpath.""") + p.add_argument("-jvm-args", default=[], type=shlex.split, + help="""Additional arguments to pass to 'java', e.g. + '-Xmx4g'""") + p.add_argument("command", choices=sorted(COMMANDS), + help="""Command to run.""") + p.add_argument("database", choices=sorted(DATABASES), + help="""Database to test.""") + args, remaining = p.parse_known_args() + ycsb_home = get_ycsb_home() + + # Use JAVA_HOME to find java binary if set, otherwise just use PATH. + java = "java" + java_home = os.getenv("JAVA_HOME") + if java_home: + java = os.path.join(java_home, "bin", "java") + db_classname = DATABASES[args.database] + command = COMMANDS[args.command]["command"] + main_classname = COMMANDS[args.command]["main"] + + # Classpath set up + binding = args.database.split("-")[0] + + if binding == "accumulo": + warn("The 'accumulo' client has been deprecated in favor of version " + "specific bindings. This name still maps to the binding for " + "Accumulo 1.6, which is named 'accumulo-1.6'. This alias will " + "be removed in a future YCSB release.") + binding = "accumulo1.6" + + if binding == "accumulo1.6": + warn("The 'accumulo1.6' client has been deprecated because Accumulo 1.6 " + "is EOM. If you are using Accumulo 1.7+ try using the 'accumulo1.7' " + "client instead.") + + if binding == "cassandra2": + warn("The 'cassandra2-cql' client has been deprecated. It has been " + "renamed to simply 'cassandra-cql'. This alias will be removed" + " in the next YCSB release.") + binding = "cassandra" + + if binding == "couchbase": + warn("The 'couchbase' client has been deprecated. If you are using " + "Couchbase 4.0+ try using the 'couchbase2' client instead.") + + if binding == "hbase098": + warn("The 'hbase098' client has been deprecated because HBase 0.98 " + "is EOM. If you are using HBase 1.2+ try using the 'hbase12' " + "client instead.") + + if binding == "hbase10": + warn("The 'hbase10' client has been deprecated because HBase 1.0 " + "is EOM. If you are using HBase 1.2+ try using the 'hbase12' " + "client instead.") + + if binding == "arangodb3": + warn("The 'arangodb3' client has been deprecated. The binding 'arangodb' " + "now covers every ArangoDB version. This alias will be removed " + "in the next YCSB release.") + binding = "arangodb" + + if is_distribution(): + db_dir = os.path.join(ycsb_home, binding + "-binding") + # include top-level conf for when we're a binding-specific artifact. + # If we add top-level conf to the general artifact, starting here + # will allow binding-specific conf to override (because it's prepended) + cp = [os.path.join(ycsb_home, "conf")] + cp.extend(find_jars(os.path.join(ycsb_home, "lib"))) + cp.extend(find_jars(os.path.join(db_dir, "lib"))) + else: + warn("Running against a source checkout. In order to get our runtime " + "dependencies we'll have to invoke Maven. Depending on the state " + "of your system, this may take ~30-45 seconds") + db_location = "core" if (binding == "basic" or binding == "basicts") else binding + project = "core" if (binding == "basic" or binding == "basicts") else binding + "-binding" + db_dir = os.path.join(ycsb_home, db_location) + # goes first so we can rely on side-effect of package + maven_says = get_classpath_from_maven(project) + # TODO when we have a version property, skip the glob + cp = find_jars(os.path.join(db_dir, "target"), + project + "*.jar") + # already in jar:jar:jar form + cp.append(maven_says) + cp.insert(0, os.path.join(db_dir, "conf")) + classpath = os.pathsep.join(cp) + if args.classpath: + classpath = os.pathsep.join([args.classpath, classpath]) + + ycsb_command = ([java] + args.jvm_args + + ["-cp", classpath, + main_classname, "-db", db_classname] + remaining) + if command: + ycsb_command.append(command) + print(" ".join(ycsb_command), file=sys.stderr) + try: + return subprocess.call(ycsb_command) + except OSError as e: + if e.errno == errno.ENOENT: + error('Command failed. Is java installed and on your PATH?') + return 1 + else: + raise + +if __name__ == '__main__': + sys.exit(main()) +EOF + +# Make the ycsb script executable +chmod +x bin/ycsb + +# Import MongoDB public GPG Key +sudo apt-get install gnupg curl +curl -fsSL https://pgp.mongodb.com/server-6.0.asc | + sudo gpg -o /usr/share/keyrings/mongodb-server-6.0.gpg --dearmor + +# Create the list file /etc/apt/sources.list.d/mongodb-org-6.0.list +echo "deb [ arch=amd64,arm64 signed-by=/usr/share/keyrings/mongodb-server-6.0.gpg ] https://repo.mongodb.org/apt/ubuntu jammy/mongodb-org/6.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-6.0.list + +sudo apt-get update +sudo apt-get install -y mongodb-org + +sudo systemctl start mongod +sudo systemctl enable mongod + +# sudo systemctl status mongod + +# Run YCSB load and run mongodb commands +./bin/ycsb load mongodb -s -P workloads/workloada \ + -p recordcount=1000000 \ + -p mongodb.url=mongodb://localhost:27017/ycsb \ + -p mongodb.writeConcern=acknowledged + +./bin/ycsb run mongodb -s -P workloads/workloada \ + -p operationcount=1000000 \ + -p mongodb.url=mongodb://localhost:27017/ycsb \ + -p readproportion=0.25 \ + -p updateproportion=0.75 \ + -p mongodb.writeConcern=acknowledged diff --git a/run_redis.sh b/scripts/run_benchmarks/run_redis.sh similarity index 99% rename from run_redis.sh rename to scripts/run_benchmarks/run_redis.sh index dbaedf7..a370e76 100755 --- a/run_redis.sh +++ b/scripts/run_benchmarks/run_redis.sh @@ -15,7 +15,7 @@ cd ycsb-0.17.0 # Replace the bin/ycsb script # Replace the bin/ycsb script -cat << 'EOF' > bin/ycsb +cat <<'EOF' >bin/ycsb #!/usr/bin/python3 # # Copyright (c) 2012 - 2015 YCSB contributors. All rights reserved. @@ -334,7 +334,7 @@ def main(): # TODO when we have a version property, skip the glob cp = find_jars(os.path.join(db_dir, "target"), project + "*.jar") - # alredy in jar:jar:jar form + # already in jar:jar:jar form cp.append(maven_says) cp.insert(0, os.path.join(db_dir, "conf")) classpath = os.pathsep.join(cp) @@ -366,5 +366,3 @@ chmod +x bin/ycsb # Run YCSB load and run redis commands ./bin/ycsb load redis -s -P workloads/workloada -p recordcount=1000000 ./bin/ycsb run redis -s -P workloads/workloada -p operationcount=1000000 -p redis.host=localhost -p redis.port=6379 -p readproportion=0.5 -p updateproportion=0.5 - - diff --git a/scripts/run_benchmarks/run_xz.sh b/scripts/run_benchmarks/run_xz.sh new file mode 100755 index 0000000..c43cea1 --- /dev/null +++ b/scripts/run_benchmarks/run_xz.sh @@ -0,0 +1,12 @@ +sudo apt-get update +sudo apt-get install -y xz-utils + +dd if=/dev/urandom of=testfile bs=1M count=1024 # Creates 1GB file + +# Run compression benchmark +echo "Running compression test..." +time xz -9 -T $(nproc) testfile + +# Run decompression benchmark +echo "Running decompression test..." +time xz -d -T $(nproc) testfile.xz diff --git a/scripts/setup-benchmarks/install_mongodb.sh b/scripts/setup-benchmarks/install_mongodb.sh index 6bff9ae..0890c36 100644 --- a/scripts/setup-benchmarks/install_mongodb.sh +++ b/scripts/setup-benchmarks/install_mongodb.sh @@ -1,6 +1,6 @@ #!/bin/bash -apt-get update && apt-get install -y openjdk-11-jdk && echo 'export PATH=$PATH:/usr/lib/jvm/java-11-openjdk-amd64/bin' >> ~/.bashrc && source ~/.bashrc +apt-get update && apt-get install -y openjdk-11-jdk && echo 'export PATH=$PATH:/usr/lib/jvm/java-11-openjdk-amd64/bin' >>~/.bashrc && source ~/.bashrc # Update and install required packages apt update @@ -14,10 +14,9 @@ pwd export YCSB_HOME=$(pwd) - # Replace the bin/ycsb script # Replace the bin/ycsb script -cat << 'EOF' > bin/ycsb +cat <<'EOF' >bin/ycsb #!/usr/bin/python3 # # Copyright (c) 2012 - 2015 YCSB contributors. All rights reserved. @@ -336,7 +335,7 @@ def main(): # TODO when we have a version property, skip the glob cp = find_jars(os.path.join(db_dir, "target"), project + "*.jar") - # alredy in jar:jar:jar form + # already in jar:jar:jar form cp.append(maven_says) cp.insert(0, os.path.join(db_dir, "conf")) classpath = os.pathsep.join(cp) @@ -367,17 +366,16 @@ chmod +x bin/ycsb # Import MongoDB public GPG Key apt-get install gnupg curl -curl -fsSL https://pgp.mongodb.com/server-6.0.asc | \ - gpg -o /usr/share/keyrings/mongodb-server-6.0.gpg --dearmor +curl -fsSL https://pgp.mongodb.com/server-6.0.asc | + gpg -o /usr/share/keyrings/mongodb-server-6.0.gpg --dearmor # Create the list file /etc/apt/sources.list.d/mongodb-org-6.0.list echo "deb [ arch=amd64,arm64 signed-by=/usr/share/keyrings/mongodb-server-6.0.gpg ] https://repo.mongodb.org/apt/ubuntu jammy/mongodb-org/6.0 multiverse" | - tee /etc/apt/sources.list.d/mongodb-org-6.0.list + tee /etc/apt/sources.list.d/mongodb-org-6.0.list apt-get update apt-get install -y mongodb-org - mkdir -p /data/db # Start MongoDB server @@ -393,10 +391,3 @@ mongod --dbpath /data/db --fork --logpath /var/log/mongodb.log -p recordcount=1000000 \ -p mongodb.url=mongodb://localhost:27017/ycsb \ -p mongodb.writeConcern=acknowledged - -# ./bin/ycsb run mongodb -s -P workloads/workloada \ -# -p operationcount=1000000 \ -# -p mongodb.url=mongodb://localhost:27017/ycsb \ -# -p readproportion=0.25 \ -# -p updateproportion=0.75 \ -# -p mongodb.writeConcern=acknowledged diff --git a/scripts/setup-benchmarks/install_ycsb.sh b/scripts/setup-benchmarks/install_ycsb.sh index 55782f1..419f4a0 100644 --- a/scripts/setup-benchmarks/install_ycsb.sh +++ b/scripts/setup-benchmarks/install_ycsb.sh @@ -12,7 +12,7 @@ echo $YCSB_HOME # Replace the bin/ycsb script # Replace the bin/ycsb script -cat << 'EOF' > bin/ycsb +cat <<'EOF' >bin/ycsb #!/usr/bin/python3 # # Copyright (c) 2012 - 2015 YCSB contributors. All rights reserved. @@ -331,7 +331,7 @@ def main(): # TODO when we have a version property, skip the glob cp = find_jars(os.path.join(db_dir, "target"), project + "*.jar") - # alredy in jar:jar:jar form + # already in jar:jar:jar form cp.append(maven_says) cp.insert(0, os.path.join(db_dir, "conf")) classpath = os.pathsep.join(cp)