Skip to content

Commit

Permalink
fix: solID -> sol_id
Browse files Browse the repository at this point in the history
add: rebase againt robynpy_release
  • Loading branch information
marcopremier committed Nov 8, 2024
1 parent 12fc25e commit dc9201e
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 39 deletions.
17 changes: 4 additions & 13 deletions python/src/robyn/modeling/pareto/pareto_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def prepare_pareto_data(
for trial in chunk_trials:
if trial.decomp_spend_dist is not None:
# Select only necessary columns
required_cols = ["trial", "iterNG", "iterPar", "rn", "mean_spend", "total_spend", "xDecompAgg"]
required_cols = ["trial", "iterNG", "iterPar", "rn", "mean_spend", "total_spend", "xDecompAgg", "sol_id"]
trial_data = trial.decomp_spend_dist[
[col for col in required_cols if col in trial.decomp_spend_dist.columns]
]
Expand Down Expand Up @@ -302,6 +302,7 @@ def prepare_pareto_data(
self.logger.info("Using single Pareto front due to fixed hyperparameters or single model")

# Automatic Pareto front selection with memory optimization
grouped_data = None
if pareto_fronts == "auto":
n_pareto = result_hyp_param["robynPareto"].notna().sum()
self.logger.info(f"Number of Pareto-optimal solutions found: {n_pareto}")
Expand All @@ -319,7 +320,6 @@ def prepare_pareto_data(
.reset_index()
)
grouped_data["n_cum"] = grouped_data["n"].cumsum()

auto_pareto = grouped_data[grouped_data["n_cum"] >= scaled_min_candidates]

if len(auto_pareto) == 0:
Expand Down Expand Up @@ -369,13 +369,11 @@ def run_dt_resp(self, row: pd.Series, paretoData: ParetoData) -> Optional[dict]:
get_spendname = row["rn"]
startRW = self.mmm_data.mmmdata_spec.rolling_window_start_which
endRW = self.mmm_data.mmmdata_spec.rolling_window_end_which

response_calculator = ResponseCurveCalculator(
mmm_data=self.mmm_data,
model_outputs=self.model_outputs,
hyperparameter=self.hyper_parameter,
)

response_output: ResponseOutput = response_calculator.calculate_response(
select_model=get_sol_id,
metric_name=get_spendname,
Expand All @@ -384,7 +382,6 @@ def run_dt_resp(self, row: pd.Series, paretoData: ParetoData) -> Optional[dict]:
dt_coef=paretoData.x_decomp_agg,
quiet=True,
)

mean_spend_adstocked = np.mean(response_output.input_total[startRW:endRW])
mean_carryover = np.mean(response_output.input_carryover[startRW:endRW])

Expand All @@ -393,7 +390,6 @@ def run_dt_resp(self, row: pd.Series, paretoData: ParetoData) -> Optional[dict]:
dt_coef = paretoData.x_decomp_agg[
(paretoData.x_decomp_agg["sol_id"] == get_sol_id) & (paretoData.x_decomp_agg["rn"] == get_spendname)
][["rn", "coef"]]

hill_calculator = HillCalculator(
mmmdata=self.mmm_data,
model_outputs=self.model_outputs,
Expand All @@ -404,7 +400,6 @@ def run_dt_resp(self, row: pd.Series, paretoData: ParetoData) -> Optional[dict]:
chn_adstocked=chn_adstocked,
)
hills = hill_calculator.get_hill_params()

mean_response = ParetoUtils.calculate_fx_objective(
x=row["mean_spend"],
coeff=hills["coefs_sorted"][0],
Expand Down Expand Up @@ -747,7 +742,6 @@ def robyn_immcarr(
axis=1,
)
temp["sol_id"] = sol_id

vec_collect = {
"xDecompVec": temp.drop(
columns=temp.columns[temp.columns.str.endswith("_MDI") | temp.columns.str.endswith("_MDC")]
Expand All @@ -763,14 +757,12 @@ def robyn_immcarr(
]
),
}

this = vec_collect["xDecompVecImmediate"].columns.str.replace("_MDI", "", regex=False)
vec_collect["xDecompVecImmediate"].columns = this
vec_collect["xDecompVecCarryover"].columns = this

df_caov = (vec_collect["xDecompVecCarryover"].groupby("sol_id").sum().reset_index()).drop(columns="ds")
df_total = vec_collect["xDecompVec"].groupby("sol_id").sum().reset_index().drop(columns="ds")

df_caov = (vec_collect["xDecompVecCarryover"].drop(columns="ds").groupby("sol_id").sum().reset_index())
df_total = vec_collect["xDecompVec"].drop(columns="ds").groupby("sol_id").sum().reset_index()
df_caov_pct = df_caov.copy()
df_caov_pct.loc[:, df_caov_pct.columns[1:]] = df_caov_pct.loc[:, df_caov_pct.columns[1:]].div(
df_total.iloc[:, 1:].values
Expand Down Expand Up @@ -805,7 +797,6 @@ def robyn_immcarr(
["sol_id", "start_date", "end_date", "type"]
)["response"].transform("sum")
xDecompVecImmeCaov.fillna(0, inplace=True)

xDecompVecImmeCaov = xDecompVecImmeCaov.merge(df_caov_pct, on=["sol_id", "rn"], how="left")

return xDecompVecImmeCaov
Expand Down
17 changes: 7 additions & 10 deletions python/src/robyn/modeling/pareto/response_curve.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def calculate_response(
dt_hyppar: pd.DataFrame = pd.DataFrame(),
dt_coef: pd.DataFrame = pd.DataFrame(),
) -> ResponseOutput:

# Determine the use case based on input parameters
usecase = self._which_usecase(metric_value, date_range)

Expand All @@ -88,7 +89,6 @@ def calculate_response(
val_list = self._check_metric_value(
metric_value, metric_name, all_values, ds_list.metric_loc
)

date_range_updated = ds_list.date_range_updated
metric_value_updated = val_list.metric_value_updated
all_values_updated = val_list.all_values_updated
Expand Down Expand Up @@ -129,14 +129,12 @@ def calculate_response(
input_immediate = x_list_sim.x[ds_list.metric_loc]
input_carryover = input_total - input_immediate


# Get saturation parameters and apply saturation
hill_params = self._get_saturation_params(select_model, hpm_name, dt_hyppar)

m_adstockedRW = x_list.x_decayed[
self.mmm_data.mmmdata_spec.rolling_window_start_which : self.mmm_data.mmmdata_spec.rolling_window_end_which
]

if usecase == UseCase.ALL_HISTORICAL_VEC:
metric_saturated_total = self.transformation.saturation_hill(
m_adstockedRW, hill_params.alphas[0], hill_params.gammas[0]
Expand All @@ -157,12 +155,11 @@ def calculate_response(
hill_params.gammas[0],
x_marginal=input_carryover,
)

metric_saturated_immediate = metric_saturated_total - metric_saturated_carryover

# Calculate final response values
coeff = dt_coef[
(dt_coef["solID"] == select_model) & (dt_coef["rn"] == hpm_name)
(dt_coef["sol_id"] == select_model) & (dt_coef["rn"] == hpm_name)
]["coef"].values[0]
m_saturated = self.transformation.saturation_hill(
m_adstockedRW, hill_params.alphas[0], hill_params.gammas[0]
Expand Down Expand Up @@ -319,18 +316,18 @@ def _get_channel_hyperparams(
params = ChannelHyperparameters()

if adstock_type == AdstockType.GEOMETRIC:
params.thetas = dt_hyppar[dt_hyppar["solID"] == select_model][
params.thetas = dt_hyppar[dt_hyppar["sol_id"] == select_model][
f"{hpm_name}_thetas"
].values
elif adstock_type in [
AdstockType.WEIBULL,
AdstockType.WEIBULL_CDF,
AdstockType.WEIBULL_PDF,
]:
params.shapes = dt_hyppar[dt_hyppar["solID"] == select_model][
params.shapes = dt_hyppar[dt_hyppar["sol_id"] == select_model][
f"{hpm_name}_shapes"
].values
params.scales = dt_hyppar[dt_hyppar["solID"] == select_model][
params.scales = dt_hyppar[dt_hyppar["sol_id"] == select_model][
f"{hpm_name}_scales"
].values

Expand All @@ -340,10 +337,10 @@ def _get_saturation_params(
self, select_model: str, hpm_name: str, dt_hyppar: pd.DataFrame
) -> ChannelHyperparameters:
params = ChannelHyperparameters()
params.alphas = dt_hyppar[dt_hyppar["solID"] == select_model][
params.alphas = dt_hyppar[dt_hyppar["sol_id"] == select_model][
f"{hpm_name}_alphas"
].values
params.gammas = dt_hyppar[dt_hyppar["solID"] == select_model][
params.gammas = dt_hyppar[dt_hyppar["sol_id"] == select_model][
f"{hpm_name}_gammas"
].values
return params
Expand Down
32 changes: 16 additions & 16 deletions python/tests/modeling/pareto/test_pareto_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
def pareto_data_data_factory():
def _create_pareto_data(aggregated_data = None):
decomp_spend_dist_df = pd.DataFrame({
'solID': ['test'],
'sol_id': ['test'],
'rn': ['media'],
})
dt_hyppar = pd.DataFrame({
'solID': ['test'],
'sol_id': ['test'],
'media_alphas': [1],
'media_gammas': [2],
})
Expand All @@ -47,7 +47,7 @@ def _create_trial_mock():
"trial": [1, 2, 3],
"iterNG": [1, 1, 1],
"iterPar": [1, 2, 3],
"solID": ["sol1", "sol2", "sol3"]
"sol_id": ["test", "test2", "test3"]
})
return trial_mock
return _create_trial_mock
Expand All @@ -63,12 +63,12 @@ def _create_aggregated_data():
"nrmse_train": [1, 2, 3],
"iterNG": [1, 2, 3],
"iterPar": [1, 2, 3],
"solID": ["sol1", "sol2", "sol3"],
"sol_id": ["test", "test2", "test3"],
"robynPareto": [1, 2, 3],
}),
"x_decomp_agg": pd.DataFrame({
"rn": ["media", "media", "media"],
"solID": ["test", "test2", "test3"],
"sol_id": ["test", "test2", "test3"],
"coef": [1, 0.5, 0.5]
}),
"result_calibration": None,
Expand Down Expand Up @@ -240,7 +240,7 @@ def test_prepare_pareto_data_hyper_fixed_false(setup_optimizer, aggregated_data_
def test_run_dt_resp(mock_transform_adstock, setup_optimizer, aggregated_data_factory, pareto_data_data_factory):
optimizer = setup_optimizer
# Setup mock data
row = pd.Series({"solID": "test", "rn": "media", "mean_spend": 1})
row = pd.Series({"sol_id": "test", "rn": "media", "mean_spend": 1})
aggregated_data = aggregated_data_factory()
adstock_result = MagicMock(spec=AdstockResult)
adstock_result.x = pd.Series([1, 2, 3])
Expand All @@ -250,15 +250,15 @@ def test_run_dt_resp(mock_transform_adstock, setup_optimizer, aggregated_data_fa
# Run the run_dt_resp function
result = optimizer.run_dt_resp(row=row, paretoData=pareto_data_data_factory(aggregated_data))
# Assertions to check the return value
expected_result = pd.Series({
"mean_response": 0.333333,
"mean_spend_adstocked": 5.0,
"mean_carryover": 3.0,
expected_result = {
"mean_response": np.float64(0.3333333333333333),
"mean_spend_adstocked": np.float64(5.0),
"mean_carryover": np.float64(3.0),
"rn": "media",
"solID": "test"
})
pd.testing.assert_series_equal(result, expected_result)
assert isinstance(result, pd.Series)
"sol_id": "test"
}
assert result == expected_result


def test_generate_plot_data(setup_optimizer, aggregated_data_factory, pareto_data_data_factory):
optimizer = setup_optimizer
Expand Down Expand Up @@ -291,7 +291,7 @@ def test_robyn_immcarr(setup_optimizer, aggregated_data_factory, pareto_data_dat
})
optimizer.hyper_parameter.adstock = AdstockType.GEOMETRIC
result_hyp_param = pd.DataFrame({
'solID': ['test'],
'sol_id': ['test'],
'media_alphas': [0.1],
'media_gammas': [0.2],
'media_thetas': [0.3] # Include this if adstock is GEOMETRIC
Expand All @@ -303,7 +303,7 @@ def test_robyn_immcarr(setup_optimizer, aggregated_data_factory, pareto_data_dat
# Assertions to check the return value
assert isinstance(result, pd.DataFrame)
expected_result = pd.DataFrame({
"solID": ["test", "test"],
"sol_id": ["test", "test"],
"start_date": ["2023-01-01", "2023-01-01"],
"end_date": ["2023-01-10", "2023-01-10"],
"rn": ["media", "media"],
Expand Down

0 comments on commit dc9201e

Please sign in to comment.