diff --git a/python/src/robyn/modeling/pareto/pareto_optimizer.py b/python/src/robyn/modeling/pareto/pareto_optimizer.py index 77b6d4a57..d2e78ee3d 100644 --- a/python/src/robyn/modeling/pareto/pareto_optimizer.py +++ b/python/src/robyn/modeling/pareto/pareto_optimizer.py @@ -265,7 +265,7 @@ def prepare_pareto_data( for trial in chunk_trials: if trial.decomp_spend_dist is not None: # Select only necessary columns - required_cols = ["trial", "iterNG", "iterPar", "rn", "mean_spend", "total_spend", "xDecompAgg"] + required_cols = ["trial", "iterNG", "iterPar", "rn", "mean_spend", "total_spend", "xDecompAgg", "sol_id"] trial_data = trial.decomp_spend_dist[ [col for col in required_cols if col in trial.decomp_spend_dist.columns] ] @@ -302,6 +302,7 @@ def prepare_pareto_data( self.logger.info("Using single Pareto front due to fixed hyperparameters or single model") # Automatic Pareto front selection with memory optimization + grouped_data = None if pareto_fronts == "auto": n_pareto = result_hyp_param["robynPareto"].notna().sum() self.logger.info(f"Number of Pareto-optimal solutions found: {n_pareto}") @@ -319,7 +320,6 @@ def prepare_pareto_data( .reset_index() ) grouped_data["n_cum"] = grouped_data["n"].cumsum() - auto_pareto = grouped_data[grouped_data["n_cum"] >= scaled_min_candidates] if len(auto_pareto) == 0: @@ -369,13 +369,11 @@ def run_dt_resp(self, row: pd.Series, paretoData: ParetoData) -> Optional[dict]: get_spendname = row["rn"] startRW = self.mmm_data.mmmdata_spec.rolling_window_start_which endRW = self.mmm_data.mmmdata_spec.rolling_window_end_which - response_calculator = ResponseCurveCalculator( mmm_data=self.mmm_data, model_outputs=self.model_outputs, hyperparameter=self.hyper_parameter, ) - response_output: ResponseOutput = response_calculator.calculate_response( select_model=get_sol_id, metric_name=get_spendname, @@ -384,7 +382,6 @@ def run_dt_resp(self, row: pd.Series, paretoData: ParetoData) -> Optional[dict]: dt_coef=paretoData.x_decomp_agg, quiet=True, ) - mean_spend_adstocked = np.mean(response_output.input_total[startRW:endRW]) mean_carryover = np.mean(response_output.input_carryover[startRW:endRW]) @@ -393,7 +390,6 @@ def run_dt_resp(self, row: pd.Series, paretoData: ParetoData) -> Optional[dict]: dt_coef = paretoData.x_decomp_agg[ (paretoData.x_decomp_agg["sol_id"] == get_sol_id) & (paretoData.x_decomp_agg["rn"] == get_spendname) ][["rn", "coef"]] - hill_calculator = HillCalculator( mmmdata=self.mmm_data, model_outputs=self.model_outputs, @@ -404,7 +400,6 @@ def run_dt_resp(self, row: pd.Series, paretoData: ParetoData) -> Optional[dict]: chn_adstocked=chn_adstocked, ) hills = hill_calculator.get_hill_params() - mean_response = ParetoUtils.calculate_fx_objective( x=row["mean_spend"], coeff=hills["coefs_sorted"][0], @@ -747,7 +742,6 @@ def robyn_immcarr( axis=1, ) temp["sol_id"] = sol_id - vec_collect = { "xDecompVec": temp.drop( columns=temp.columns[temp.columns.str.endswith("_MDI") | temp.columns.str.endswith("_MDC")] @@ -763,14 +757,12 @@ def robyn_immcarr( ] ), } - this = vec_collect["xDecompVecImmediate"].columns.str.replace("_MDI", "", regex=False) vec_collect["xDecompVecImmediate"].columns = this vec_collect["xDecompVecCarryover"].columns = this - df_caov = (vec_collect["xDecompVecCarryover"].groupby("sol_id").sum().reset_index()).drop(columns="ds") - df_total = vec_collect["xDecompVec"].groupby("sol_id").sum().reset_index().drop(columns="ds") - + df_caov = (vec_collect["xDecompVecCarryover"].drop(columns="ds").groupby("sol_id").sum().reset_index()) + df_total = vec_collect["xDecompVec"].drop(columns="ds").groupby("sol_id").sum().reset_index() df_caov_pct = df_caov.copy() df_caov_pct.loc[:, df_caov_pct.columns[1:]] = df_caov_pct.loc[:, df_caov_pct.columns[1:]].div( df_total.iloc[:, 1:].values @@ -805,7 +797,6 @@ def robyn_immcarr( ["sol_id", "start_date", "end_date", "type"] )["response"].transform("sum") xDecompVecImmeCaov.fillna(0, inplace=True) - xDecompVecImmeCaov = xDecompVecImmeCaov.merge(df_caov_pct, on=["sol_id", "rn"], how="left") return xDecompVecImmeCaov diff --git a/python/src/robyn/modeling/pareto/response_curve.py b/python/src/robyn/modeling/pareto/response_curve.py index 266e2d164..56396d206 100644 --- a/python/src/robyn/modeling/pareto/response_curve.py +++ b/python/src/robyn/modeling/pareto/response_curve.py @@ -72,6 +72,7 @@ def calculate_response( dt_hyppar: pd.DataFrame = pd.DataFrame(), dt_coef: pd.DataFrame = pd.DataFrame(), ) -> ResponseOutput: + # Determine the use case based on input parameters usecase = self._which_usecase(metric_value, date_range) @@ -88,7 +89,6 @@ def calculate_response( val_list = self._check_metric_value( metric_value, metric_name, all_values, ds_list.metric_loc ) - date_range_updated = ds_list.date_range_updated metric_value_updated = val_list.metric_value_updated all_values_updated = val_list.all_values_updated @@ -129,14 +129,12 @@ def calculate_response( input_immediate = x_list_sim.x[ds_list.metric_loc] input_carryover = input_total - input_immediate - # Get saturation parameters and apply saturation hill_params = self._get_saturation_params(select_model, hpm_name, dt_hyppar) m_adstockedRW = x_list.x_decayed[ self.mmm_data.mmmdata_spec.rolling_window_start_which : self.mmm_data.mmmdata_spec.rolling_window_end_which ] - if usecase == UseCase.ALL_HISTORICAL_VEC: metric_saturated_total = self.transformation.saturation_hill( m_adstockedRW, hill_params.alphas[0], hill_params.gammas[0] @@ -157,12 +155,11 @@ def calculate_response( hill_params.gammas[0], x_marginal=input_carryover, ) - metric_saturated_immediate = metric_saturated_total - metric_saturated_carryover # Calculate final response values coeff = dt_coef[ - (dt_coef["solID"] == select_model) & (dt_coef["rn"] == hpm_name) + (dt_coef["sol_id"] == select_model) & (dt_coef["rn"] == hpm_name) ]["coef"].values[0] m_saturated = self.transformation.saturation_hill( m_adstockedRW, hill_params.alphas[0], hill_params.gammas[0] @@ -319,7 +316,7 @@ def _get_channel_hyperparams( params = ChannelHyperparameters() if adstock_type == AdstockType.GEOMETRIC: - params.thetas = dt_hyppar[dt_hyppar["solID"] == select_model][ + params.thetas = dt_hyppar[dt_hyppar["sol_id"] == select_model][ f"{hpm_name}_thetas" ].values elif adstock_type in [ @@ -327,10 +324,10 @@ def _get_channel_hyperparams( AdstockType.WEIBULL_CDF, AdstockType.WEIBULL_PDF, ]: - params.shapes = dt_hyppar[dt_hyppar["solID"] == select_model][ + params.shapes = dt_hyppar[dt_hyppar["sol_id"] == select_model][ f"{hpm_name}_shapes" ].values - params.scales = dt_hyppar[dt_hyppar["solID"] == select_model][ + params.scales = dt_hyppar[dt_hyppar["sol_id"] == select_model][ f"{hpm_name}_scales" ].values @@ -340,10 +337,10 @@ def _get_saturation_params( self, select_model: str, hpm_name: str, dt_hyppar: pd.DataFrame ) -> ChannelHyperparameters: params = ChannelHyperparameters() - params.alphas = dt_hyppar[dt_hyppar["solID"] == select_model][ + params.alphas = dt_hyppar[dt_hyppar["sol_id"] == select_model][ f"{hpm_name}_alphas" ].values - params.gammas = dt_hyppar[dt_hyppar["solID"] == select_model][ + params.gammas = dt_hyppar[dt_hyppar["sol_id"] == select_model][ f"{hpm_name}_gammas" ].values return params diff --git a/python/tests/modeling/pareto/test_pareto_optimizer.py b/python/tests/modeling/pareto/test_pareto_optimizer.py index 0caf26bb9..91213f8ec 100644 --- a/python/tests/modeling/pareto/test_pareto_optimizer.py +++ b/python/tests/modeling/pareto/test_pareto_optimizer.py @@ -22,11 +22,11 @@ def pareto_data_data_factory(): def _create_pareto_data(aggregated_data = None): decomp_spend_dist_df = pd.DataFrame({ - 'solID': ['test'], + 'sol_id': ['test'], 'rn': ['media'], }) dt_hyppar = pd.DataFrame({ - 'solID': ['test'], + 'sol_id': ['test'], 'media_alphas': [1], 'media_gammas': [2], }) @@ -47,7 +47,7 @@ def _create_trial_mock(): "trial": [1, 2, 3], "iterNG": [1, 1, 1], "iterPar": [1, 2, 3], - "solID": ["sol1", "sol2", "sol3"] + "sol_id": ["test", "test2", "test3"] }) return trial_mock return _create_trial_mock @@ -63,12 +63,12 @@ def _create_aggregated_data(): "nrmse_train": [1, 2, 3], "iterNG": [1, 2, 3], "iterPar": [1, 2, 3], - "solID": ["sol1", "sol2", "sol3"], + "sol_id": ["test", "test2", "test3"], "robynPareto": [1, 2, 3], }), "x_decomp_agg": pd.DataFrame({ "rn": ["media", "media", "media"], - "solID": ["test", "test2", "test3"], + "sol_id": ["test", "test2", "test3"], "coef": [1, 0.5, 0.5] }), "result_calibration": None, @@ -240,7 +240,7 @@ def test_prepare_pareto_data_hyper_fixed_false(setup_optimizer, aggregated_data_ def test_run_dt_resp(mock_transform_adstock, setup_optimizer, aggregated_data_factory, pareto_data_data_factory): optimizer = setup_optimizer # Setup mock data - row = pd.Series({"solID": "test", "rn": "media", "mean_spend": 1}) + row = pd.Series({"sol_id": "test", "rn": "media", "mean_spend": 1}) aggregated_data = aggregated_data_factory() adstock_result = MagicMock(spec=AdstockResult) adstock_result.x = pd.Series([1, 2, 3]) @@ -250,15 +250,15 @@ def test_run_dt_resp(mock_transform_adstock, setup_optimizer, aggregated_data_fa # Run the run_dt_resp function result = optimizer.run_dt_resp(row=row, paretoData=pareto_data_data_factory(aggregated_data)) # Assertions to check the return value - expected_result = pd.Series({ - "mean_response": 0.333333, - "mean_spend_adstocked": 5.0, - "mean_carryover": 3.0, + expected_result = { + "mean_response": np.float64(0.3333333333333333), + "mean_spend_adstocked": np.float64(5.0), + "mean_carryover": np.float64(3.0), "rn": "media", - "solID": "test" - }) - pd.testing.assert_series_equal(result, expected_result) - assert isinstance(result, pd.Series) + "sol_id": "test" + } + assert result == expected_result + def test_generate_plot_data(setup_optimizer, aggregated_data_factory, pareto_data_data_factory): optimizer = setup_optimizer @@ -291,7 +291,7 @@ def test_robyn_immcarr(setup_optimizer, aggregated_data_factory, pareto_data_dat }) optimizer.hyper_parameter.adstock = AdstockType.GEOMETRIC result_hyp_param = pd.DataFrame({ - 'solID': ['test'], + 'sol_id': ['test'], 'media_alphas': [0.1], 'media_gammas': [0.2], 'media_thetas': [0.3] # Include this if adstock is GEOMETRIC @@ -303,7 +303,7 @@ def test_robyn_immcarr(setup_optimizer, aggregated_data_factory, pareto_data_dat # Assertions to check the return value assert isinstance(result, pd.DataFrame) expected_result = pd.DataFrame({ - "solID": ["test", "test"], + "sol_id": ["test", "test"], "start_date": ["2023-01-01", "2023-01-01"], "end_date": ["2023-01-10", "2023-01-10"], "rn": ["media", "media"],