[#160] Remove the custom pytest.mark.quickcheck

The built-in pytest filtering functionality has been more helpful than this custom marker. I added some documentation to doc/developer.md which points to the pytest docs for filtering.
ipums · Nov 21, 2024 · 35a7810 · 35a7810
1 parent 53b0ad3
commit 35a7810
Show file tree

Hide file tree

Showing 10 changed files with 17 additions and 27 deletions.
diff --git a/doc/developer.md b/doc/developer.md
@@ -19,9 +19,23 @@ To set up a copy of this project for development,
 
 ## Running Tests
 
-To run the project's test suite, run `pytest` in the root project directory. Running all of the tests
-can take a while, depending on your computer's hardware and setup. To run a subset of tests that test some but not
-all of the core features, try `pytest -m quickcheck`. These tests should run much more quickly.
+To run the project's test suite, run `pytest` in the root project directory.
+Running all of the tests can take a while, depending on your computer's
+hardware and setup. If you are working on a particular bug or feature, there
+are several good ways to filter the tests to run just tests that interest you.
+Check out the pytest documentation
+[here](https://docs.pytest.org/en/latest/how-to/usage.html#specifying-which-tests-to-run).
+
+In particular, the `-k` argument is helpful for running only tests with names
+that match the topics you are interested in, like this:
+
+```
+pytest -k "lightgbm or xgboost"
+```
+
+The GitHub Actions workflow runs all of the tests on each push or PR to the
+main branch. It runs the tests on several versions of Python and in several
+different Python environments.
 
 ## Building the Scala Jar
 

diff --git a/hlink/tests/config_loader_test.py b/hlink/tests/config_loader_test.py
@@ -5,24 +5,20 @@
 
 from hlink.configs.load_config import load_conf_file
 import os.path
-import pytest
 
 
-@pytest.mark.quickcheck
 def test_load_conf_file_json(conf_dir_path):
     conf_file = os.path.join(conf_dir_path, "test")
     conf = load_conf_file(conf_file)
     assert conf["id_column"] == "id"
 
 
-@pytest.mark.quickcheck
 def test_load_conf_file_toml(conf_dir_path):
     conf_file = os.path.join(conf_dir_path, "test1")
     conf = load_conf_file(conf_file)
     assert conf["id_column"] == "id-toml"
 
 
-@pytest.mark.quickcheck
 def test_load_conf_file_json2(conf_dir_path):
     conf_file = os.path.join(conf_dir_path, "test_conf_flag_run")
     conf = load_conf_file(conf_file)

diff --git a/hlink/tests/core/pipeline_test.py b/hlink/tests/core/pipeline_test.py
@@ -1,8 +1,6 @@
-import pytest
 import hlink.linking.core.pipeline as pipeline_core
 
 
-@pytest.mark.quickcheck
 def test_categorical_comparison_features():
     """Catches a bug where comparison features marked as categorical = false
     were still included as categorical. See Issue #81.

diff --git a/hlink/tests/main_loop_test.py b/hlink/tests/main_loop_test.py
@@ -5,12 +5,10 @@
 
 import os
 import pandas as pd
-import pytest
 from pyspark.ml.feature import VectorAssembler, OneHotEncoder
 from hlink.linking.link_run import link_task_choices
 
 
-@pytest.mark.quickcheck
 def test_do_get_steps(capsys, main, spark):
     for task in link_task_choices:
         task_inst = getattr(main.link_run, task)
@@ -22,7 +20,6 @@ def test_do_get_steps(capsys, main, spark):
             assert str(step) in output
 
 
-@pytest.mark.quickcheck
 def test_do_set_link_task(capsys, main):
     main.current_link_task = main.link_run.matching
     main.do_set_link_task("preprocessing")

diff --git a/hlink/tests/main_test.py b/hlink/tests/main_test.py
@@ -59,7 +59,6 @@ def test_load_conf_does_not_exist_no_env(monkeypatch, tmp_path, conf_file, user)
         load_conf(filename, user)
 
 
-@pytest.mark.quickcheck
 @pytest.mark.parametrize("conf_file", ("my_conf.json",))
 @pytest.mark.parametrize("user", users)
 def test_load_conf_json_exists_no_env(monkeypatch, tmp_path, conf_file, user):
@@ -90,7 +89,6 @@ def test_load_conf_json_exists_ext_added_no_env(monkeypatch, tmp_path, conf_name
     assert conf["conf_path"] == filename
 
 
-@pytest.mark.quickcheck
 @pytest.mark.parametrize("conf_file", ("my_conf.toml",))
 @pytest.mark.parametrize("user", users)
 def test_load_conf_toml_exists_no_env(monkeypatch, tmp_path, conf_file, user):
@@ -189,7 +187,6 @@ def test_load_conf_does_not_exist_env(
         load_conf(conf_file, user)
 
 
-@pytest.mark.quickcheck
 @pytest.mark.parametrize("conf_file", ("my_conf.json",))
 @pytest.mark.parametrize("user", users)
 def test_load_conf_json_exists_in_conf_dir_env(
@@ -209,7 +206,6 @@ def test_load_conf_json_exists_in_conf_dir_env(
     assert conf["conf_path"] == str(file)
 
 
-@pytest.mark.quickcheck
 @pytest.mark.parametrize("conf_file", ("my_conf.toml",))
 @pytest.mark.parametrize("user", users)
 def test_load_conf_toml_exists_in_conf_dir_env(

diff --git a/hlink/tests/matching_blocking_explode_test.py b/hlink/tests/matching_blocking_explode_test.py
@@ -4,13 +4,11 @@
 #   https://github.com/ipums/hlink
 
 from pyspark.sql import Row
-import pytest
 import pandas as pd
 from hlink.linking.matching.link_step_match import extract_or_groups_from_blocking
 from hlink.linking.matching.link_step_score import LinkStepScore
 
 
-@pytest.mark.quickcheck
 def test_steps_1_2_matching(
     spark, blocking_explode_conf, matching_test_input, matching, main
 ):

diff --git a/hlink/tests/preprocessing_test.py b/hlink/tests/preprocessing_test.py
@@ -10,7 +10,6 @@
 from hlink.errors import DataError
 
 
-@pytest.mark.quickcheck
 def test_step_0(preprocessing, spark, preprocessing_conf):
     """Test preprocessing step 0 to ensure that temporary raw_df_unpartitioned_(a/b) tables are created (exact copies of datasources from config). Also test that the presistent raw_df_(a/b) tables are created. Should be same as raw datasources with filters applied"""
 

diff --git a/hlink/tests/table_test.py b/hlink/tests/table_test.py
@@ -8,14 +8,12 @@ def simple_schema():
     return StructType([StructField("test", StringType())])
 
 
-@pytest.mark.quickcheck
 @pytest.mark.parametrize("table_name", ["this_table_does_not_exist", "@@@", "LOL rofl"])
 def test_exists_table_does_not_exist(spark, table_name):
     t = Table(spark, table_name, "table used for testing")
     assert not t.exists()
 
 
-@pytest.mark.quickcheck
 @pytest.mark.parametrize("table_name", ["table_for_testing_Table_class"])
 def test_exists_table_does_exist(spark, table_name, simple_schema):
     t = Table(spark, table_name, "table used for testing")
@@ -25,7 +23,6 @@ def test_exists_table_does_exist(spark, table_name, simple_schema):
     spark.sql(f"DROP TABLE {table_name}")
 
 
-@pytest.mark.quickcheck
 @pytest.mark.parametrize("table_name", ["table_for_testing_Table_class"])
 def test_drop_table_does_exist(spark, table_name, simple_schema):
     t = Table(spark, table_name, "table used for testing")
@@ -45,7 +42,6 @@ def test_drop_table_does_not_exist(spark, table_name):
     assert not t.exists()
 
 
-@pytest.mark.quickcheck
 @pytest.mark.parametrize("table_name", ["table_for_testing_Table_class"])
 def test_df_table_does_exist(spark, table_name, simple_schema):
     t = Table(spark, table_name, "table used for testing")

diff --git a/hlink/tests/training_test.py b/hlink/tests/training_test.py
@@ -8,7 +8,6 @@
 import hlink.linking.core.pipeline as pipeline_core
 
 
-@pytest.mark.quickcheck
 def test_all_steps(
     spark,
     training_conf,

diff --git a/pytest.ini b/pytest.ini