Adapt tests from modin-project#4724

Co-authored-by: mvashishtha <[email protected]> Signed-off-by: Vasily Litvinov <[email protected]>
vnlitvinov · Aug 4, 2023 · 1d0b19a · 1d0b19a
1 parent 665d6d0
commit 1d0b19a
Showing 1 changed file with 60 additions and 7 deletions.
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
@@ -93,13 +93,6 @@
 
 from modin.config import NPartitions
 
-# Our configuration in pytest.ini requires that we explicitly catch all
-# instances of defaulting to pandas, but some test modules, like this one,
-# have too many such instances.
-# TODO(https://github.com/modin-project/modin/issues/3655): catch all instances
-# of defaulting to pandas.
-pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)
-
 NPartitions.put(4)
 
 DATASET_SIZE_DICT = {
@@ -269,6 +262,7 @@ def _make_parquet_dir(
     IsExperimental.get() and StorageFormat.get() == "Pyarrow",
     reason="Segmentation fault; see PR #2347 ffor details",
 )
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestCsv:
     # delimiter tests
     @pytest.mark.parametrize("sep", [None, "_", ",", ".", "\n"])
@@ -1307,6 +1301,24 @@ def test_read_csv_1930(self, usecols):
         )
 
 
+# Leave this test apart from the test classes, which skip the default to pandas
+# warning check. We want to make sure we are NOT defaulting to pandas for a
+# path relative to user home.
+# TODO(https://github.com/modin-project/modin/issues/3655): Get rid of this
+# commment once we turn all default to pandas messages into errors.
+def test_read_csv_relative_to_user_home(make_csv_file):
+    with ensure_clean(".csv") as unique_filename:
+        make_csv_file(filename=unique_filename)
+
+        with mock.patch.dict(os.environ, {"HOME": os.path.dirname(unique_filename)}):
+            with warns_that_defaulting_to_pandas() if Engine.get() == "Python" else _nullcontext():
+                eval_io(
+                    fn_name="read_csv",
+                    filepath_or_buffer=f"~/{os.path.basename(unique_filename)}",
+                )
+
+
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestTable:
     def test_read_table(self, make_csv_file):
         with ensure_clean() as unique_filename:
@@ -1358,6 +1370,7 @@ def test_read_table_empty_frame(self, make_csv_file):
 
 
 @pytest.mark.parametrize("engine", ["pyarrow", "fastparquet"])
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestParquet:
     @pytest.mark.parametrize("columns", [None, ["col1"]])
     @pytest.mark.parametrize("row_group_size", [None, 100, 1000, 10_000])
@@ -1729,6 +1742,24 @@ def test_read_parquet_s3_with_column_partitioning(self, engine):
         )
 
 
+# Leave this test apart from the test classes, which skip the default to pandas
+# warning check. We want to make sure we are NOT defaulting to pandas for a
+# path relative to user home.
+# TODO(https://github.com/modin-project/modin/issues/3655): Get rid of this
+# commment once we turn all default to pandas messages into errors.
+def test_read_parquet_relative_to_user_home(make_parquet_file):
+    with ensure_clean(".parquet") as unique_filename:
+        make_parquet_file(filename=unique_filename)
+
+        with mock.patch.dict(os.environ, {"HOME": os.path.dirname(unique_filename)}):
+            with warns_that_defaulting_to_pandas() if Engine.get() == "Python" else _nullcontext():
+                eval_io(
+                    fn_name="read_parquet",
+                    path=f"~/{os.path.basename(unique_filename)}",
+                )
+
+
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestJson:
     @pytest.mark.parametrize("lines", [False, True])
     def test_read_json(self, make_json_file, lines):
@@ -1838,6 +1869,7 @@ def test_read_json_metadata(self, make_json_file):
         assert parts_width_cached == parts_width_actual
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestExcel:
     @check_file_leaks
     def test_read_excel(self, make_excel_file):
@@ -2018,6 +2050,7 @@ def test_read_excel_empty_frame(self, make_excel_file):
         )
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestHdf:
     @pytest.mark.parametrize("format", [None, "table"])
     def test_read_hdf(self, make_hdf_file, format):
@@ -2072,6 +2105,7 @@ def test_HDFStore_in_read_hdf(self):
         df_equals(modin_df, pandas_df)
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestSql:
     @pytest.mark.parametrize("read_sql_engine", ["Pandas", "Connectorx"])
     def test_read_sql(self, tmp_path, make_sql_connection, read_sql_engine):
@@ -2246,6 +2280,7 @@ def test_to_sql(self, tmp_path, make_sql_connection, index, conn_type):
         assert df_modin_sql.sort_index().equals(df_pandas_sql.sort_index())
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestHtml:
     def test_read_html(self, make_html_file):
         eval_io(fn_name="read_html", io=make_html_file())
@@ -2262,6 +2297,7 @@ def test_to_html(self, tmp_path):
         )
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestFwf:
     def test_fwf_file(self, make_fwf_file):
         fwf_data = (
@@ -2489,6 +2525,7 @@ def test_read_fwf_s3(self, storage_options):
         )
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestGbq:
     @pytest.mark.skip(reason="Can not pass without GBQ access")
     def test_read_gbq(self):
@@ -2519,6 +2556,7 @@ def test_read_gbq_mock(self):
         read_gbq.assert_called_once_with(*test_args, **test_kwargs)
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestStata:
     def test_read_stata(self, make_stata_file):
         eval_io(
@@ -2538,6 +2576,7 @@ def test_to_stata(self, tmp_path):
         )
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestSas:
     def test_read_sas(self):
         eval_io(
@@ -2547,6 +2586,7 @@ def test_read_sas(self):
         )
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestFeather:
     def test_read_feather(self, make_feather_file):
         eval_io(
@@ -2611,6 +2651,7 @@ def test_read_feather_with_index_metadata(self, tmp_path):
         )
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestClipboard:
     @pytest.mark.skip(reason="No clipboard in CI")
     def test_read_clipboard(self):
@@ -2631,6 +2672,7 @@ def test_to_clipboard(self):
         assert modin_as_clip.equals(pandas_as_clip)
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestPickle:
     def test_read_pickle(self, make_pickle_file):
         eval_io(
@@ -2650,6 +2692,7 @@ def test_to_pickle(self, tmp_path):
         )
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestXml:
     def test_read_xml(self):
         # example from pandas
@@ -2669,6 +2712,7 @@ def test_read_xml(self):
         eval_io("read_xml", path_or_buffer=data)
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestOrc:
     # It's not easy to add infrastructure for `orc` format.
     # In case of defaulting to pandas, it's enough
@@ -2687,6 +2731,7 @@ def test_read_orc(self):
         read_orc.assert_called_once_with(*test_args, **test_kwargs)
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 class TestSpss:
     # It's not easy to add infrastructure for `spss` format.
     # In case of defaulting to pandas, it's enough
@@ -2703,6 +2748,7 @@ def test_read_spss(self):
         read_spss.assert_called_once_with(*test_args, **test_kwargs)
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 def test_json_normalize():
     # example from pandas
     data = [
@@ -2713,12 +2759,14 @@ def test_json_normalize():
     eval_io("json_normalize", data=data)
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 def test_from_arrow():
     _, pandas_df = create_test_dfs(TEST_DATA)
     modin_df = from_arrow(pa.Table.from_pandas(pandas_df))
     df_equals(modin_df, pandas_df)
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 def test_from_spmatrix():
     data = sparse.eye(3)
     with pytest.warns(UserWarning, match="defaulting to pandas.*"):
@@ -2727,12 +2775,14 @@ def test_from_spmatrix():
     df_equals(modin_df, pandas_df)
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 def test_to_dense():
     data = {"col1": pandas.arrays.SparseArray([0, 1, 0])}
     modin_df, pandas_df = create_test_dfs(data)
     df_equals(modin_df.sparse.to_dense(), pandas_df.sparse.to_dense())
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 def test_to_dict_dataframe():
     modin_df, _ = create_test_dfs(TEST_DATA)
     assert modin_df.to_dict() == to_pandas(modin_df).to_dict()
@@ -2747,6 +2797,7 @@ def test_to_dict_dataframe():
         pytest.param({"into": defaultdict(list)}, id="into_defaultdict"),
     ],
 )
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 def test_to_dict_series(kwargs):
     eval_general(
         *[df.iloc[:, 0] for df in create_test_dfs(utils_test_data["int_data"])],
@@ -2757,11 +2808,13 @@ def test_to_dict_series(kwargs):
     )
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 def test_to_latex():
     modin_df, _ = create_test_dfs(TEST_DATA)
     assert modin_df.to_latex() == to_pandas(modin_df).to_latex()
 
 
+@pytest.mark.filterwarnings(default_to_pandas_ignore_string)
 def test_to_period():
     index = pandas.DatetimeIndex(
         pandas.date_range("2000", freq="h", periods=len(TEST_DATA["col1"]))