Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Message distribution plot improvement #47

Merged
merged 3 commits into from
Jan 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 41 additions & 7 deletions src/mesh_analysis/waku_message_log_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
import base64
import logging
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects
import numpy as np
import seaborn as sns
from concurrent.futures import ProcessPoolExecutor, as_completed
from pathlib import Path
from typing import List, Dict
Expand All @@ -12,6 +16,7 @@
from src.mesh_analysis.readers.file_reader import FileReader
from src.mesh_analysis.readers.victoria_reader import VictoriaReader
from src.mesh_analysis.tracers.waku_tracer import WakuTracer
from src.plotting.utils import add_boxplot_stat_labels
from src.utils import file_utils, log_utils, path_utils, list_utils

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -324,12 +329,19 @@ def analyze_message_timestamps(self, time_difference_threshold: int):
logger.info(f'{file}: {jump[0]} to {jump[1]} -> {jump[2]}')


def plot_message_distribution(self):
import matplotlib.pyplot as plt
import seaborn as sns
def plot_message_distribution(self, received_summary_path: Path, plot_title: str, dump_path: Path) -> Result[None, str]:
"""
Note that this function assumes that analyze_message_logs has been called, since timestamps will be checked
from logs.
"""
if not received_summary_path.exists():
error = f'Received summary file {received_summary_path} does not exist'
logger.error(error)
return Err(error)

sns.set_theme()

df = pd.read_csv('local_data/mixed_enviroment/summary/received.csv', parse_dates=['timestamp'])
df = pd.read_csv(received_summary_path, parse_dates=['timestamp'])
df.set_index(['shard', 'msg_hash', 'timestamp'], inplace=True)

time_ranges = df.groupby(level='msg_hash').apply(
Expand All @@ -340,10 +352,32 @@ def plot_message_distribution(self):
time_ranges_df = time_ranges.reset_index(name='time_to_reach')

plt.figure(figsize=(12, 6))
sns.boxplot(x='time_to_reach', data=time_ranges_df, color='skyblue')
ax = sns.boxplot(x='time_to_reach', data=time_ranges_df, color='skyblue', whis=(0,100))

add_boxplot_stat_labels(ax, value_type="min")
add_boxplot_stat_labels(ax, value_type="max")
add_boxplot_stat_labels(ax, value_type="median")

q1 = np.percentile(time_ranges_df['time_to_reach'], 25)
q3 = np.percentile(time_ranges_df['time_to_reach'], 75)

text = ax.text(y=-0.1, x=q1, s=f'{q1:.3f}', ha='center', va='center',
fontweight='bold', color='white', size=10)
text.set_path_effects([
path_effects.Stroke(linewidth=3, foreground=ax.get_lines()[0].get_color()),
path_effects.Normal(),
])
text = ax.text(y=-0.1, x=q3, s=f'{q3:.3f}', ha='center', va='center',
fontweight='bold', color='white', size=10)
text.set_path_effects([
path_effects.Stroke(linewidth=3, foreground=ax.get_lines()[0].get_color()),
path_effects.Normal(),
])

plt.xlabel('Time to Reach All Nodes (seconds)')
plt.title('210 Nodes - 1msg/s - 1KB - 600 messages \n Message time distribution')
plt.title(plot_title)

plt.savefig("distribution-mixed")
plt.savefig(dump_path)
plt.show()

return Ok(None)
55 changes: 13 additions & 42 deletions src/plotting/plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects
from typing import List, Dict
from matplotlib import ticker

# Project Imports
from src.data.data_handler import DataHandler
from src.data.data_file_handler import DataFileHandler
from src.plotting.utils import add_boxplot_stat_labels

logger = logging.getLogger(__name__)
sns.set_theme()
Expand Down Expand Up @@ -67,53 +67,24 @@ def _add_subplot_df_to_axs(self, df: pd.DataFrame, index: int, axs: np.ndarray,
box_plot.xaxis.set_tick_params(rotation=45)
box_plot.legend(loc='upper right', bbox_to_anchor=(1, 1))

self._add_stat_labels(box_plot)
result = add_boxplot_stat_labels(box_plot)
if result.is_err():
logger.error(result.err_value)

show_min_max = plot_specs.get("show_min_max", False)
if show_min_max:
self._add_stat_labels(box_plot, value_type="min")
self._add_stat_labels(box_plot, value_type="max")
result = add_boxplot_stat_labels(box_plot, value_type="min")
if result.is_err():
logger.error(result.err_value)

result = add_boxplot_stat_labels(box_plot, value_type="max")
if result.is_err():
logger.error(result.err_value)

def _create_subplot_paths_group(self, plot_specs: Dict) -> List:
subplot_path = [[f"{folder}{data}" for folder in plot_specs["folder"]] for data in
plot_specs["data"]]

return subplot_path

def _add_stat_labels(self, ax: plt.Axes, fmt: str = ".3f", value_type: str = "median") -> None:
# Refactor from https://stackoverflow.com/a/63295846
"""
Add text labels to the median, minimum, or maximum lines of a seaborn boxplot.

Args:
ax: plt.Axes, e.g., the return value of sns.boxplot()
fmt: Format string for the value (e.g., min/max/median).
value_type: The type of value to label. Can be 'median', 'min', or 'max'.
"""
lines = ax.get_lines()
boxes = [c for c in ax.get_children() if "Patch" in str(c)] # Get box patches
start = 4
if not boxes: # seaborn v0.13 or above (no patches => need to shift index)
boxes = [c for c in ax.get_lines() if len(c.get_xdata()) == 5]
start += 1
lines_per_box = len(lines) // len(boxes)

if value_type == "median":
line_idx = start
elif value_type == "min":
line_idx = start - 2 # min line comes 2 positions before the median
elif value_type == "max":
line_idx = start - 1 # max line comes 1 position before the median
else:
raise ValueError("Invalid value_type. Must be 'min', 'max', or 'median'.")

for value_line in lines[line_idx::lines_per_box]:
x, y = (data.mean() for data in value_line.get_data())
# choose value depending on horizontal or vertical plot orientation
value = x if len(set(value_line.get_xdata())) == 1 else y
text = ax.text(x, y, f'{value / 1000:{fmt}}', ha='center', va='center',
fontweight='bold', color='white', size=10)
# create colored border around white text for contrast
text.set_path_effects([
path_effects.Stroke(linewidth=3, foreground=value_line.get_color()),
path_effects.Normal(),
])

48 changes: 48 additions & 0 deletions src/plotting/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Python Imports
import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects
from result import Err, Result, Ok

# Project Imports


def add_boxplot_stat_labels(ax: plt.Axes, fmt: str = ".3f", value_type: str = "median") -> Result[None, str]:
# Refactor from https://stackoverflow.com/a/63295846
"""
Add text labels to the median, minimum, or maximum lines of a seaborn boxplot.

Args:
ax: plt.Axes, e.g., the return value of sns.boxplot()
fmt: Format string for the value (e.g., min/max/median).
value_type: The type of value to label. Can be 'median', 'min', or 'max'.
"""
lines = ax.get_lines()
boxes = [c for c in ax.get_children() if "Patch" in str(c)] # Get box patches
start = 4
if not boxes: # seaborn v0.13 or above (no patches => need to shift index)
boxes = [c for c in ax.get_lines() if len(c.get_xdata()) == 5]
start += 1
lines_per_box = len(lines) // len(boxes)

if value_type == "median":
line_idx = start
elif value_type == "min":
line_idx = start - 2 # min line comes 2 positions before the median
elif value_type == "max":
line_idx = start - 1 # max line comes 1 position before the median
else:
return Err("Invalid value_type. Must be 'min', 'max', or 'median'.")

for value_line in lines[line_idx::lines_per_box]:
x, y = (data.mean() for data in value_line.get_data())
# choose value depending on horizontal or vertical plot orientation
value = x if len(set(value_line.get_xdata())) == 1 else y
text = ax.text(x, y, f'{value / 1000:{fmt}}', ha='center', va='center',
fontweight='bold', color='white', size=10)
# create colored border around white text for contrast
text.set_path_effects([
path_effects.Stroke(linewidth=3, foreground=value_line.get_color()),
path_effects.Normal(),
])

return Ok(None)