Skip to content

Commit

Permalink
add client duplication
Browse files Browse the repository at this point in the history
  • Loading branch information
CrossNox committed Jul 8, 2022
1 parent 39e627e commit c2f30ca
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 2 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [3.4.0] - 2022-07-07
### Added
- Client can now send duplicate messages

## [3.3.0] - 2022-07-07
### Added
- CLI parameter where to download results to
Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ down:
another-client:
$(DOCKER_BIN) run --network lazarus_net -v $(CURDIR)/settings.ini:/app/settings.ini:ro -v $(CURDIR)/data:/data:ro --entrypoint poetry 7574-tp3:latest run virgilius -vv /data/the-reddit-irl-dataset-posts-reduced.csv /data/the-reddit-irl-dataset-comments-reduced.csv server_0 server_1 server_2

duplicates-client:
$(DOCKER_BIN) run --network lazarus_net -v $(CURDIR)/settings.ini:/app/settings.ini:ro -v $(CURDIR)/data:/data:ro --entrypoint poetry 7574-tp3:latest run virgilius -vv /data/the-reddit-irl-dataset-posts-reduced.csv /data/the-reddit-irl-dataset-comments-reduced.csv server_0 server_1 server_2 --duplicates 1.00

docker-image:
$(DOCKER_BIN) build -f ./docker/Dockerfile -t "7574-tp3:latest" .
$(DOCKER_BIN) build -f ./rabbitmq/Dockerfile -t "rabbitmq:latest" .
Expand Down
5 changes: 4 additions & 1 deletion lazarus/client/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ def main(
cfg.lazarus.download_dir(default=DEFAULT_MEME_PATH),
help="Path to download meme to",
),
duplicates: float = typer.Option(
0.0, min=0.0, max=1.0, help="The probability of sending twice each row"
),
verbose: int = typer.Option(
DEFAULT_VERBOSE,
"--verbose",
Expand All @@ -36,6 +39,6 @@ def main(
"""Client entrypoint."""
config_logging(verbose, pretty)

client = Client(hosts, posts, comments, download_dir)
client = Client(hosts, posts, comments, download_dir, duplicates=duplicates)

client.run()
5 changes: 5 additions & 0 deletions lazarus/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,14 @@ def __init__(
posts_path: Path,
comments_path: Path,
download_dir: Path,
duplicates: float = 0.0,
):
self.download_dir = download_dir
if not self.download_dir.exists():
self.download_dir.mkdir(parents=True, exist_ok=True)

self.duplicates = duplicates

self.hosts = hosts
self.posts_path = posts_path
self.comments_path = comments_path
Expand All @@ -66,6 +69,7 @@ def run(self):
posts_exchange,
self.posts_path,
posts_groups or [],
self.duplicates,
)

pcomments = FileProvider(
Expand All @@ -74,6 +78,7 @@ def run(self):
comments_exchange,
self.comments_path,
comments_groups or [],
self.duplicates,
)

logger.info("Starting posts relay process")
Expand Down
6 changes: 6 additions & 0 deletions lazarus/client/file_provider.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import csv
import random
from typing import List
from pathlib import Path
from multiprocessing import Process
Expand All @@ -23,6 +24,7 @@ def __init__(
exchange: str,
file_path: Path,
groups: List[str],
duplicates: float = 0.0,
):
super().__init__()

Expand All @@ -31,6 +33,7 @@ def __init__(
self.file_path = file_path
self.exchange_name = exchange
self.groups = groups
self.duplicates = duplicates

def run(self):
try:
Expand Down Expand Up @@ -61,6 +64,9 @@ def run(self):
msg = Message(data=m)
for exch in exchanges:
exch.push(msg)
if random.random() < self.duplicates:
logger.info("Sending duplicate message")
exch.push(msg)
if count % 1000 == 0:
logger.info(f"Sent {count} messages")
m = {"type": EOS, "session_id": self.session_id, "id": "client"}
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "lazarus"
version = "3.3.0"
version = "3.4.0"
description = "High Availability Reddit Memes Analyzer, for fun (and passing Distribuidos I)."
authors = ["CrossNox <[email protected]>", "FdelMazo <[email protected]>", "JulianVentura <[email protected]>"]

Expand Down

0 comments on commit c2f30ca

Please sign in to comment.