Skip to content

Commit

Permalink
Stop using boston housing dataset
Browse files Browse the repository at this point in the history
Instead we use the california housing dataset.
  • Loading branch information
betatim committed Dec 12, 2024
1 parent 84a858b commit 0112051
Showing 1 changed file with 7 additions and 38 deletions.
45 changes: 7 additions & 38 deletions python/cuml/cuml/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,48 +247,17 @@ def housing_dataset():
return X, y, feature_names


@functools.cache
def get_boston_data():
n_retries = 3
url = "https://raw.githubusercontent.com/scikit-learn/scikit-learn/baf828ca126bcb2c0ad813226963621cafe38adb/sklearn/datasets/data/boston_house_prices.csv" # noqa: E501
for _ in range(n_retries):
try:
return pd.read_csv(url, header=None)
except Exception:
time.sleep(1)
raise RuntimeError(
f"Failed to download file from {url} after {n_retries} retries."
)


@pytest.fixture(scope="session")
def deprecated_boston_dataset():
# dataset was removed in Scikit-learn 1.2, we should change it for a
# better dataset for tests, see
# https://github.com/rapidsai/cuml/issues/5158

try:
df = get_boston_data()
except: # noqa E722
pytest.xfail(reason="Error fetching Boston housing dataset")
n_samples = int(df[0][0])
data = df[list(np.arange(13))].values[2:n_samples].astype(np.float64)
targets = df[13].values[2:n_samples].astype(np.float64)

return Bunch(
data=data,
target=targets,
)


@pytest.fixture(
scope="session",
params=["digits", "deprecated_boston_dataset", "diabetes", "cancer"],
scope="module",
params=["digits", "housing_dataset", "diabetes", "cancer"],
)
def test_datasets(request, deprecated_boston_dataset):
def test_datasets(request, housing_dataset):
X, y, _ = housing_dataset
housing_dataset = Bunch(data=X.get(), target=y.get())

test_datasets_dict = {
"digits": datasets.load_digits(),
"deprecated_boston_dataset": deprecated_boston_dataset,
"housing_dataset": housing_dataset,
"diabetes": datasets.load_diabetes(),
"cancer": datasets.load_breast_cancer(),
}
Expand Down

0 comments on commit 0112051

Please sign in to comment.