Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added benchmarking capabilities for the mongodb benchmark #23

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ benchmark-gap:
-c ${KERNMLOPS_CONFIG_FILE} \
--benchmark gap

benchmark-mongodb:
@python python/kernmlops collect -v \
-c ${KERNMLOPS_CONFIG_FILE} \
--benchmark mongodb

benchmark-linux-build:
@python python/kernmlops collect -v \
-c ${KERNMLOPS_CONFIG_FILE} \
Expand Down Expand Up @@ -182,6 +187,28 @@ docker:
${IMAGE_NAME}:${VERSION} \
${CONTAINER_CMD} || true

# Benchmark Installation commands

# Must be run before using any ycsb benchmark (once)
install-ycsb:
@echo "Installing ycsb..."
@source scripts/setup-benchmarks/install_ycsb.sh
export YCSB_HOME=/KernMLOps/ycsb-0.17.0

export YCSB_HOME=/KernMLOps/ycsb-0.17.0

# Must be run before using any mongodb benchmark (once)
install-mongodb:
@echo "Installing mongodb benchmark..."
@source scripts/setup-benchmarks/install_mongodb.sh

# Must be run before using the MongoDB benchmark each time YCSB target benchmark is changed
load-mongodb:
@echo "Running YCSB load..."
${YCSB_PATH}/bin/ycsb load mongodb -s -P workloads/workloada \
-p recordcount=1000000 \
-p mongodb.url=mongodb://localhost:27017/ycsb \
-p mongodb.writeConcern=acknowledged

# Miscellaneous commands
clean-docker-images:
Expand Down
2 changes: 2 additions & 0 deletions python/kernmlops/kernmlops_benchmark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@
BenchmarkRunningError,
)
from kernmlops_benchmark.gap import GapBenchmark
from kernmlops_benchmark.mongodb import MongoDbBenchmark
from kernmlops_benchmark.linux_build import LinuxBuildBenchmark
from kernmlops_config import ConfigBase

benchmarks: Mapping[str, type[Benchmark]] = {
FauxBenchmark.name(): FauxBenchmark,
LinuxBuildBenchmark.name(): LinuxBuildBenchmark,
GapBenchmark.name(): GapBenchmark,
MongoDbBenchmark.name(): MongoDbBenchmark,
}

BenchmarkConfig = make_dataclass(
Expand Down
2 changes: 2 additions & 0 deletions python/kernmlops/kernmlops_benchmark/gap.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ def setup(self) -> None:
def run(self) -> None:
if self.process is not None:
raise BenchmarkRunningError()

print(str(self.benchmark_dir / self.config.gap_benchmark))
self.process = subprocess.Popen(
[
str(self.benchmark_dir / self.config.gap_benchmark),
Expand Down
90 changes: 90 additions & 0 deletions python/kernmlops/kernmlops_benchmark/mongodb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import subprocess
from dataclasses import dataclass
from typing import Literal, cast

from data_schema import GraphEngine, demote
from kernmlops_benchmark.benchmark import Benchmark, GenericBenchmarkConfig
from kernmlops_benchmark.errors import (
BenchmarkNotInCollectionData,
BenchmarkNotRunningError,
BenchmarkRunningError,
)
from kernmlops_config import ConfigBase


@dataclass(frozen=True)
class BenchmarkConfig(ConfigBase):

recordCount: int = 1000000
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please use lower snake case instead of camel case, i.e. record_count
note: Pascal case is fine for class names as you have done

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, after you make the above fix, please run make defaults to generate a new defaults.yaml and then commit those changes

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, it does not look like you use these configuration options at all anywhere
Please use them to configure the mongodb benchmark

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make defaults is consistently failing with this:

Traceback (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/anish/KernMLOps/python/kernmlops/__main__.py", line 3, in <module>
    import cli
  File "/home/anish/KernMLOps/python/kernmlops/cli/__init__.py", line 7, in <module>
    import data_collection
  File "/home/anish/KernMLOps/python/kernmlops/data_collection/__init__.py", line 4, in <module>
    from data_collection import bpf_instrumentation as bpf
  File "/home/anish/KernMLOps/python/kernmlops/data_collection/bpf_instrumentation/__init__.py", line 5, in <module>
    from data_collection.bpf_instrumentation.bpf_hook import BPFProgram
  File "/home/anish/KernMLOps/python/kernmlops/data_collection/bpf_instrumentation/bpf_hook.py", line 4, in <module>
    from data_schema import CollectionTable
  File "/home/anish/KernMLOps/python/kernmlops/data_schema/__init__.py", line 7, in <module>
    from data_schema import perf
  File "/home/anish/KernMLOps/python/kernmlops/data_schema/perf/__init__.py", line 3, in <module>
    from data_schema.perf.perf_schema import (
  File "/home/anish/KernMLOps/python/kernmlops/data_schema/perf/perf_schema.py", line 3, in <module>
    from typing import Protocol, override
ImportError: cannot import name 'override' from 'typing' (/usr/lib/python3.10/typing.py)

readProportion: float = 0.25
updateProportion: float =0.75


class MongoDbBenchmark(Benchmark):

@classmethod
def name(cls) -> str:
return "mongodb"

@classmethod
def default_config(cls) -> ConfigBase:
return BenchmarkConfig()

@classmethod
def from_config(cls, config: ConfigBase) -> "Benchmark":
generic_config = cast(GenericBenchmarkConfig, getattr(config, "generic"))
gap_config = cast(BenchmarkConfig, getattr(config, cls.name()))
anish-palakurthi marked this conversation as resolved.
Show resolved Hide resolved
return MongoDbBenchmark(generic_config=generic_config, config=gap_config)

def __init__(self, *, generic_config: GenericBenchmarkConfig, config: BenchmarkConfig):
self.generic_config = generic_config
self.config = config
self.benchmark_dir = self.generic_config.get_benchmark_dir() / self.name()
self.process: subprocess.Popen | None = None

def is_configured(self) -> bool:
return True
# print(f'is_configured directory name: {self.benchmark_dir}')
return self.benchmark_dir.is_dir()

def setup(self) -> None:
if self.process is not None:
raise BenchmarkRunningError()
self.generic_config.generic_setup()

def run(self) -> None:
if self.process is not None:
raise BenchmarkRunningError()

bash_file_path = self.benchmark_dir / "run_mongodb.sh" # Add the path to your bash file here
print(bash_file_path)
self.process = subprocess.Popen(
[
"bash",
bash_file_path,
anish-palakurthi marked this conversation as resolved.
Show resolved Hide resolved

],
preexec_fn=demote(),
stdout=subprocess.DEVNULL,
)

def poll(self) -> int | None:
if self.process is None:
raise BenchmarkNotRunningError()
return self.process.poll()

def wait(self) -> None:
if self.process is None:
raise BenchmarkNotRunningError()
self.process.wait()

def kill(self) -> None:
if self.process is None:
raise BenchmarkNotRunningError()
self.process.terminate()

@classmethod
def plot_events(cls, graph_engine: GraphEngine) -> None:
if graph_engine.collection_data.benchmark != cls.name():
raise BenchmarkNotInCollectionData()
# TODO(Patrick): plot when a trial starts/ends
Loading