Skip to content

Commit

Permalink
Document all functions
Browse files Browse the repository at this point in the history
Signed-off-by: Mihai Maruseac <[email protected]>
  • Loading branch information
mihaimaruseac committed Sep 10, 2024
1 parent 13b019a commit 938e894
Showing 1 changed file with 35 additions and 0 deletions.
35 changes: 35 additions & 0 deletions benchmarks/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,17 @@
def get_hash_engine_factory(
hash_algorithm: str,
) -> type[hashing.StreamingHashEngine]:
"""Returns the class that implements a hashing method.
Args:
hash_algorithm: the hash algorithm to implement.
Returns:
The class that corresponds to the algorithm.
Raises:
ValueError: if the algorithm is not implemented/not valid.
"""
if hash_algorithm == "sha256":
return memory.SHA256
if hash_algorithm == "blake2":
Expand All @@ -39,6 +50,16 @@ def get_hash_engine_factory(
def get_sharded_file_hasher_factory(
hash_algorithm: str, chunk_size: int, shard_size: int
) -> Callable[[pathlib.Path, int, int], file.ShardedFileHasher]:
"""Returns a hasher factory for sharded serialization.
Args:
hash_algorithm: the hash algorithm to use for each shard.
chunk_size: the chunk size to use when reading shards.
shard_size: the shard size used in generating the shards.
Returns:
A callable for the hashing factory.
"""
hash_engine = get_hash_engine_factory(hash_algorithm)

def _hasher_factory(
Expand All @@ -59,6 +80,15 @@ def _hasher_factory(
def get_file_hasher_factory(
hash_algorithm: str, chunk_size: int
) -> Callable[[pathlib.Path], file.FileHasher]:
"""Returns a hasher factory for file serialization.
Args:
hash_algorithm: the hash algorithm to use for each file.
chunk_size: the chunk size to use when reading files.
Returns:
A callable for the hashing factory.
"""
hash_engine = get_hash_engine_factory(hash_algorithm)

def _hasher_factory(path: pathlib.Path) -> file.FileHasher:
Expand All @@ -72,6 +102,11 @@ def _hasher_factory(path: pathlib.Path) -> file.FileHasher:


def run(args: argparse.Namespace) -> None:
"""Performs the benchmark.
Args:
args: The arguments specifying the benchmark scenario.
"""
# 1. Hashing layer
if args.use_shards:
hasher = get_sharded_file_hasher_factory(
Expand Down

0 comments on commit 938e894

Please sign in to comment.