utcs-scea · anish-palakurthi · Nov 1, 2024 · Nov 7, 2024 · Nov 11, 2024 · Nov 11, 2024
diff --git a/Makefile b/Makefile
@@ -106,6 +106,11 @@ benchmark-gap:
 	-c ${KERNMLOPS_CONFIG_FILE} \
 	--benchmark gap
 
+benchmark-mongodb:
+	@python python/kernmlops collect -v \
+	-c ${KERNMLOPS_CONFIG_FILE} \
+	--benchmark mongodb
+
 benchmark-linux-build:
 	@python python/kernmlops collect -v \
 	-c ${KERNMLOPS_CONFIG_FILE} \
@@ -182,6 +187,28 @@ docker:
 	${IMAGE_NAME}:${VERSION} \
 	${CONTAINER_CMD} || true
 
+# Benchmark Installation commands
+
+# Must be run before using any ycsb benchmark (once)
+install-ycsb:
+	@echo "Installing ycsb..."
+	@source scripts/setup-benchmarks/install_ycsb.sh
+	export YCSB_HOME=/KernMLOps/ycsb-0.17.0
+
+export YCSB_HOME=/KernMLOps/ycsb-0.17.0
+
+# Must be run before using any mongodb benchmark (once)
+install-mongodb:
+	@echo "Installing mongodb benchmark..."
+	@source scripts/setup-benchmarks/install_mongodb.sh
+
+# Must be run before using the MongoDB benchmark each time YCSB target benchmark is changed
+load-mongodb:
+	@echo "Running YCSB load..."
+	${YCSB_PATH}/bin/ycsb load mongodb -s -P workloads/workloada \
+		-p recordcount=1000000 \
+		-p mongodb.url=mongodb://localhost:27017/ycsb \
+		-p mongodb.writeConcern=acknowledged
 
 # Miscellaneous commands
 clean-docker-images:

diff --git a/python/kernmlops/kernmlops_benchmark/__init__.py b/python/kernmlops/kernmlops_benchmark/__init__.py
@@ -14,12 +14,14 @@
 )
 from kernmlops_benchmark.gap import GapBenchmark
 from kernmlops_benchmark.linux_build import LinuxBuildBenchmark
+from kernmlops_benchmark.mongodb import MongoDbBenchmark
 from kernmlops_config import ConfigBase
 
 benchmarks: Mapping[str, type[Benchmark]] = {
     FauxBenchmark.name(): FauxBenchmark,
     LinuxBuildBenchmark.name(): LinuxBuildBenchmark,
     GapBenchmark.name(): GapBenchmark,
+    MongoDbBenchmark.name(): MongoDbBenchmark,
 }
 
 BenchmarkConfig = make_dataclass(

diff --git a/python/kernmlops/kernmlops_benchmark/gap.py b/python/kernmlops/kernmlops_benchmark/gap.py
@@ -51,6 +51,8 @@ def setup(self) -> None:
     def run(self) -> None:
         if self.process is not None:
             raise BenchmarkRunningError()
+
+        print(str(self.benchmark_dir / self.config.gap_benchmark))
         self.process = subprocess.Popen(
             [
                 str(self.benchmark_dir / self.config.gap_benchmark),

diff --git a/python/kernmlops/kernmlops_benchmark/mongodb.py b/python/kernmlops/kernmlops_benchmark/mongodb.py
@@ -0,0 +1,90 @@
+import subprocess
+from dataclasses import dataclass
+from typing import cast
+
+from data_schema import GraphEngine, demote
+from kernmlops_benchmark.benchmark import Benchmark, GenericBenchmarkConfig
+from kernmlops_benchmark.errors import (
+  BenchmarkNotInCollectionData,
+  BenchmarkNotRunningError,
+  BenchmarkRunningError,
+)
+from kernmlops_config import ConfigBase
+
+
+@dataclass(frozen=True)
+class MongoDbConfig(ConfigBase):
+  record_count: int = 1000000
+  readProportion: float = 0.25
+  updateProportion: float = 0.75
+
+
+class MongoDbBenchmark(Benchmark):
+
+    @classmethod
+    def name(cls) -> str:
+        return "mongodb"
+
+    @classmethod
+    def default_config(cls) -> ConfigBase:
+        return MongoDbConfig()
+
+    @classmethod
+    def from_config(cls, config: ConfigBase) -> "Benchmark":
+        generic_config = cast(GenericBenchmarkConfig, getattr(config, "generic"))
+        mongodb_config = cast(MongoDbConfig, getattr(config, cls.name()))
+        return MongoDbBenchmark(generic_config=generic_config, config=mongodb_config)
+
+    def __init__(self, *, generic_config: GenericBenchmarkConfig, config: MongoDbConfig):
+        self.generic_config = generic_config
+        self.config = config
+        self.benchmark_dir = self.generic_config.get_benchmark_dir() / self.name()
+        self.process: subprocess.Popen | None = None
+
+    def is_configured(self) -> bool:
+        return True
+        # print(f'is_configured directory name: {self.benchmark_dir}')
+        return self.benchmark_dir.is_dir()
+
+    def setup(self) -> None:
+        if self.process is not None:
+            raise BenchmarkRunningError()
+        self.generic_config.generic_setup()
+
+    def run(self) -> None:
+        if self.process is not None:
+            raise BenchmarkRunningError()
+
+        bash_file_path = "../scripts/run_benchmarks/run_mongodb.sh"
+        self.process = subprocess.Popen(
+            [
+                "bash",
+                str(bash_file_path),
+                str(self.config.record_count),
+                str(self.config.readProportion),
+                str(self.config.updateProportion)
+            ],
+            preexec_fn=demote(),
+            stdout=subprocess.DEVNULL,
+        )
+
+    def poll(self) -> int | None:
+        if self.process is None:
+            raise BenchmarkNotRunningError()
+        return self.process.poll()
+
+    def wait(self) -> None:
+        if self.process is None:
+            raise BenchmarkNotRunningError()
+        self.process.wait()
+
+    def kill(self) -> None:
+        if self.process is None:
+            raise BenchmarkNotRunningError()
+        self.process.terminate()
+
+    @classmethod
+    def plot_events(cls, graph_engine: GraphEngine) -> None:
+        if graph_engine.collection_data.benchmark != cls.name():
+            raise BenchmarkNotInCollectionData()
+        # TODO(Patrick): plot when a trial starts/ends