Skip to content

Commit

Permalink
[#160] Remove the custom pytest.mark.quickcheck
Browse files Browse the repository at this point in the history
The built-in pytest filtering functionality has been more helpful than this
custom marker. I added some documentation to doc/developer.md which points to
the pytest docs for filtering.
  • Loading branch information
riley-harper committed Nov 21, 2024
1 parent 53b0ad3 commit 35a7810
Show file tree
Hide file tree
Showing 10 changed files with 17 additions and 27 deletions.
20 changes: 17 additions & 3 deletions doc/developer.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,23 @@ To set up a copy of this project for development,

## Running Tests

To run the project's test suite, run `pytest` in the root project directory. Running all of the tests
can take a while, depending on your computer's hardware and setup. To run a subset of tests that test some but not
all of the core features, try `pytest -m quickcheck`. These tests should run much more quickly.
To run the project's test suite, run `pytest` in the root project directory.
Running all of the tests can take a while, depending on your computer's
hardware and setup. If you are working on a particular bug or feature, there
are several good ways to filter the tests to run just tests that interest you.
Check out the pytest documentation
[here](https://docs.pytest.org/en/latest/how-to/usage.html#specifying-which-tests-to-run).

In particular, the `-k` argument is helpful for running only tests with names
that match the topics you are interested in, like this:

```
pytest -k "lightgbm or xgboost"
```

The GitHub Actions workflow runs all of the tests on each push or PR to the
main branch. It runs the tests on several versions of Python and in several
different Python environments.

## Building the Scala Jar

Expand Down
4 changes: 0 additions & 4 deletions hlink/tests/config_loader_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,20 @@

from hlink.configs.load_config import load_conf_file
import os.path
import pytest


@pytest.mark.quickcheck
def test_load_conf_file_json(conf_dir_path):
conf_file = os.path.join(conf_dir_path, "test")
conf = load_conf_file(conf_file)
assert conf["id_column"] == "id"


@pytest.mark.quickcheck
def test_load_conf_file_toml(conf_dir_path):
conf_file = os.path.join(conf_dir_path, "test1")
conf = load_conf_file(conf_file)
assert conf["id_column"] == "id-toml"


@pytest.mark.quickcheck
def test_load_conf_file_json2(conf_dir_path):
conf_file = os.path.join(conf_dir_path, "test_conf_flag_run")
conf = load_conf_file(conf_file)
Expand Down
2 changes: 0 additions & 2 deletions hlink/tests/core/pipeline_test.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import pytest
import hlink.linking.core.pipeline as pipeline_core


@pytest.mark.quickcheck
def test_categorical_comparison_features():
"""Catches a bug where comparison features marked as categorical = false
were still included as categorical. See Issue #81.
Expand Down
3 changes: 0 additions & 3 deletions hlink/tests/main_loop_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,10 @@

import os
import pandas as pd
import pytest
from pyspark.ml.feature import VectorAssembler, OneHotEncoder
from hlink.linking.link_run import link_task_choices


@pytest.mark.quickcheck
def test_do_get_steps(capsys, main, spark):
for task in link_task_choices:
task_inst = getattr(main.link_run, task)
Expand All @@ -22,7 +20,6 @@ def test_do_get_steps(capsys, main, spark):
assert str(step) in output


@pytest.mark.quickcheck
def test_do_set_link_task(capsys, main):
main.current_link_task = main.link_run.matching
main.do_set_link_task("preprocessing")
Expand Down
4 changes: 0 additions & 4 deletions hlink/tests/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ def test_load_conf_does_not_exist_no_env(monkeypatch, tmp_path, conf_file, user)
load_conf(filename, user)


@pytest.mark.quickcheck
@pytest.mark.parametrize("conf_file", ("my_conf.json",))
@pytest.mark.parametrize("user", users)
def test_load_conf_json_exists_no_env(monkeypatch, tmp_path, conf_file, user):
Expand Down Expand Up @@ -90,7 +89,6 @@ def test_load_conf_json_exists_ext_added_no_env(monkeypatch, tmp_path, conf_name
assert conf["conf_path"] == filename


@pytest.mark.quickcheck
@pytest.mark.parametrize("conf_file", ("my_conf.toml",))
@pytest.mark.parametrize("user", users)
def test_load_conf_toml_exists_no_env(monkeypatch, tmp_path, conf_file, user):
Expand Down Expand Up @@ -189,7 +187,6 @@ def test_load_conf_does_not_exist_env(
load_conf(conf_file, user)


@pytest.mark.quickcheck
@pytest.mark.parametrize("conf_file", ("my_conf.json",))
@pytest.mark.parametrize("user", users)
def test_load_conf_json_exists_in_conf_dir_env(
Expand All @@ -209,7 +206,6 @@ def test_load_conf_json_exists_in_conf_dir_env(
assert conf["conf_path"] == str(file)


@pytest.mark.quickcheck
@pytest.mark.parametrize("conf_file", ("my_conf.toml",))
@pytest.mark.parametrize("user", users)
def test_load_conf_toml_exists_in_conf_dir_env(
Expand Down
2 changes: 0 additions & 2 deletions hlink/tests/matching_blocking_explode_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@
# https://github.com/ipums/hlink

from pyspark.sql import Row
import pytest
import pandas as pd
from hlink.linking.matching.link_step_match import extract_or_groups_from_blocking
from hlink.linking.matching.link_step_score import LinkStepScore


@pytest.mark.quickcheck
def test_steps_1_2_matching(
spark, blocking_explode_conf, matching_test_input, matching, main
):
Expand Down
1 change: 0 additions & 1 deletion hlink/tests/preprocessing_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from hlink.errors import DataError


@pytest.mark.quickcheck
def test_step_0(preprocessing, spark, preprocessing_conf):
"""Test preprocessing step 0 to ensure that temporary raw_df_unpartitioned_(a/b) tables are created (exact copies of datasources from config). Also test that the presistent raw_df_(a/b) tables are created. Should be same as raw datasources with filters applied"""

Expand Down
4 changes: 0 additions & 4 deletions hlink/tests/table_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,12 @@ def simple_schema():
return StructType([StructField("test", StringType())])


@pytest.mark.quickcheck
@pytest.mark.parametrize("table_name", ["this_table_does_not_exist", "@@@", "LOL rofl"])
def test_exists_table_does_not_exist(spark, table_name):
t = Table(spark, table_name, "table used for testing")
assert not t.exists()


@pytest.mark.quickcheck
@pytest.mark.parametrize("table_name", ["table_for_testing_Table_class"])
def test_exists_table_does_exist(spark, table_name, simple_schema):
t = Table(spark, table_name, "table used for testing")
Expand All @@ -25,7 +23,6 @@ def test_exists_table_does_exist(spark, table_name, simple_schema):
spark.sql(f"DROP TABLE {table_name}")


@pytest.mark.quickcheck
@pytest.mark.parametrize("table_name", ["table_for_testing_Table_class"])
def test_drop_table_does_exist(spark, table_name, simple_schema):
t = Table(spark, table_name, "table used for testing")
Expand All @@ -45,7 +42,6 @@ def test_drop_table_does_not_exist(spark, table_name):
assert not t.exists()


@pytest.mark.quickcheck
@pytest.mark.parametrize("table_name", ["table_for_testing_Table_class"])
def test_df_table_does_exist(spark, table_name, simple_schema):
t = Table(spark, table_name, "table used for testing")
Expand Down
1 change: 0 additions & 1 deletion hlink/tests/training_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import hlink.linking.core.pipeline as pipeline_core


@pytest.mark.quickcheck
def test_all_steps(
spark,
training_conf,
Expand Down
3 changes: 0 additions & 3 deletions pytest.ini

This file was deleted.

0 comments on commit 35a7810

Please sign in to comment.