diff --git a/hlink/tests/input_data/potential_matches_agg.csv b/hlink/tests/input_data/potential_matches_agg.csv
index 1416be9..95f836f 100644
--- a/hlink/tests/input_data/potential_matches_agg.csv
+++ b/hlink/tests/input_data/potential_matches_agg.csv
@@ -1,31 +1,31 @@
-﻿namelast_clean_a,namelast_clean_b,histid_a,histid_b,bpl_a,bpl_b,namefrst_unstd_a,namefrst_unstd_b,sex_a,sex_b,namefrst_jw,namelast_jw,regionf,state_distance,exact,exact_all
-cridlebaugh,cridlebaugh,0202928A-AC3E-48BB-8568-3372067F35C7,002B8A74-3795-4997-BC5B-2A07257668F9,3100,3100,gerald,gerald,1,1,1.0,1.0,4,0,1.00,1.00
-cridlebaugh,cridlebaugh,0202928A-AC3E-48BB-8568-3372067F35C7,003B8A74-3795-4997-BC5B-2A07257668F9,3100,3100,gerald,gerald,1,1,1.0,1.0,4,0,1.00,1.00
-cridlebaugh,cridlebaugh,0202928A-AC3E-48BB-8568-3372067F35C7,001B8A74-3795-4997-BC5B-2A07257668F9,3100,3100,gerald,gerald,1,1,1.0,1.0,4,0,1.00,1.00
-symonds,symonds,1E5D9C44-3D8E-40F8-A843-8E7619CF7B94,00427A22-FF1E-400A-9A8A-1752A60BE7CF,3600,3600,horace,horace,1,1,1.0,1.0,2,0,1.00,1.00
-symonds,symonds,1E5D9C44-3D8E-40F8-A843-8E7619CF7B94,00327A22-FF1E-400A-9A8A-1752A60BE7CF,3600,3600,horace,horace,1,1,1.0,1.0,2,0,1.00,1.00
-abrahams,abrahams,095AD921-9B08-468E-817A-44879FBCADDE,01620FE0-E907-47F4-9368-5B14EBF69BE4,60094,60094,isiah,isniah,1,1,0.9555555555555556,1.0,99,0,0.00,0.00
-abrahams,abrahams,095AD921-9B08-468E-817A-44879FBCADDE,02620FE0-E907-47F4-9368-5B14EBF69BE4,60094,60094,isiah,isniah,1,1,0.9555555555555556,1.0,99,0,0.00,0.00
-abrahams,abrahams,095AD921-9B08-468E-817A-44879FBCADDE,03620FE0-E907-47F4-9368-5B14EBF69BE4,60094,60094,isiah,isniah,1,1,0.9555555555555556,1.0,99,0,0.00,0.00
-abrahams,abrahams,095AD921-9B08-468E-817A-44879FBCADDE,00620FE0-E907-47F4-9368-5B14EBF69BE4,60094,60094,isiah,isniah,1,1,0.9555555555555556,1.0,99,0,0.00,0.00
-eilbatt,eilbott,6F6D3D9A-A2C4-471C-A19A-5EFE90AAA5C7,00669345-C937-4405-A0F0-1FCA5204DF64,4700,4700,reginald,reginald,1,1,1.0,0.9428571428571428,6,0,1.00,0.00
-knopke,knopke,EAD03D68-F21D-4A74-8C16-F9123F5288D7,007EDAE5-BDEF-4819-969E-7DAA8DC3FDD6,2100,2100,andrew,andrew,1,1,1.0,1.0,6,0,1.00,1.00
-caldwell,caldwell,AF3C7686-98EF-46F5-B5DF-DE8CC50A93DC,00849961-E52F-42F2-9B70-052606223052,15010,15010,daisy,daisy,2,2,1.0,1.0,99,0,1.00,1.00
-sonnenschein,sonnenschein,8A50FA06-BAF8-4EC5-9726-2EB3551CD6D1,00C4291F-7064-4A81-8589-5854C367EEC4,1700,1700,max,max,1,1,1.0,1.0,3,0,1.00,1.00
-gibson,gebson,F2798AB4-3217-4D0A-A6A0-6B390A3C4B7A,010F244F-94D0-4295-82DB-0E172724358A,5500,5500,dwight,dwight,1,1,1.0,0.9,3,0,1.00,0.00
-hegewald,hegewald,D30C40B9-2E7C-4933-84CE-CEAAB37E3209,01230024-F3C6-4D4A-86DF-F9EF29F77292,5600,5600,karl,karl,1,1,1.0,1.0,8,0,1.00,1.00
-king,king,CCBA170F-93D0-42C3-A57B-CCABBF2772FB,0141C5C9-32DC-469E-862C-A6CCD8A2EB2B,3800,3800,virgel,virgil,1,1,0.9333333333333333,1.0,4,0,0.00,0.00
-looney,looney,4F29F4B1-F953-4FC8-A7FB-42F54FB51E73,016EF43B-E70F-440E-882E-E447663F682F,4700,4700,sadie,sadye,2,2,0.9066666666666667,1.0,6,0,0.00,0.00
-rydstrom,rydstrom,CC7B3BF9-AEE5-4ECB-9F5D-9F910346B9CD,018C7B94-4387-4149-9B2D-CA7BB18AA559,1700,1700,hubert,hubert,1,1,1.0,1.0,3,0,1.00,1.00
-mugrdickian,mugrdichian,2CC7B61B-6821-4B2B-A283-8FE8D557D6F6,019D26A0-0335-48B5-A6D6-1D499424BE84,3600,3600,misak,misak,1,1,1.0,0.977961432506887,2,0,1.00,0.00
-brightman,brightman,195EA695-D047-4045-8757-E7A22F12E148,0269E114-0EDD-4767-AE9F-B6557CD880EE,3900,3900,austin,anstin,1,1,0.9,1.0,3,0,0.00,0.00
-harman,harman,74941094-9737-40F0-BF3C-0C2380B08040,0282109F-581C-4B8E-A99D-135CF0077C2E,5400,5400,eston,estan,1,1,0.9066666666666667,1.0,5,0,0.00,0.00
-oglesby,oglesby,F0F34E2F-49CC-4F06-8CC4-691CF3150244,02A06F96-AAD4-4EE2-B20B-CD1A4ED33D46,4000,4000,stephen,stephen,1,1,1.0,1.0,7,0,1.00,1.00
-kassik,kassek,6EB222E3-EB8F-4E20-BCE0-2C12F926ABB1,033FD0FA-C523-42B5-976A-751E830F7021,5600,5600,james,james,1,1,1.0,0.9333333333333333,8,0,1.00,0.00
-wood,wood,EE22ED8E-9544-4C77-A689-75895376E3EB,0350987D-D6B3-4519-A7C0-96C5B45111D6,1700,1700,dudley,dudley,1,1,1.0,1.0,3,0,1.00,1.00
-foulkrod,foulkrod,47DB90F0-6A7B-421F-9B18-CAB1CFA45E71,03522784-6097-4A7A-A54E-C6AA8E22BF20,4200,4200,s,s,1,1,1.0,1.0,2,0,1.00,1.00
-huges,hughes,7E20FBBE-9B4B-4FAB-9433-CB77D9E6B022,038F08DA-12C8-4AF2-B5DD-43BB2A58DAA1,100,100,keneth,kenneth,1,1,0.9666666666666667,0.9611111111111111,6,0,0.00,0.00
-caldwell,caldwell,A859D9BC-6106-43A2-8A47-B12D9D2C49C8,039AE50E-84E7-49A7-B720-48D2C765C5D5,5000,5000,nathan,nathan,1,1,1.0,1.0,1,0,1.00,1.00
-platta,platts,E19E5381-C68D-4E03-A688-597DF13311CE,03B89FD5-872A-4504-9758-F5AA1607BA01,1200,1200,norman,norman,1,1,1.0,0.9444444444444444,5,0,1.00,0.00
-lipscomb,lipscomb,671DE512-479B-4EEB-85B4-93A848E6BDD7,03DD4EB7-0FA7-4AA4-A510-79448E316A43,1300,1300,roy,roy,1,1,1.0,1.0,5,0,1.00,1.00
-woodburne,woodburn,81E992C0-3796-4BE7-B02E-9CAD0289C6EC,03FFD04A-DC09-47EC-84EF-A0DD3E9C0528,200,200,walter,walter,1,1,1.0,0.9925925925925926,9,0,1.00,0.00
\ No newline at end of file
+histid_a,histid_b,namefrst_jw,namelast_jw,regionf,state_distance
+0202928A-AC3E-48BB-8568-3372067F35C7,002B8A74-3795-4997-BC5B-2A07257668F9,1.0,1.0,4,0
+0202928A-AC3E-48BB-8568-3372067F35C7,003B8A74-3795-4997-BC5B-2A07257668F9,1.0,1.0,4,0
+0202928A-AC3E-48BB-8568-3372067F35C7,001B8A74-3795-4997-BC5B-2A07257668F9,1.0,1.0,4,0
+1E5D9C44-3D8E-40F8-A843-8E7619CF7B94,00427A22-FF1E-400A-9A8A-1752A60BE7CF,1.0,1.0,2,0
+1E5D9C44-3D8E-40F8-A843-8E7619CF7B94,00327A22-FF1E-400A-9A8A-1752A60BE7CF,1.0,1.0,2,0
+095AD921-9B08-468E-817A-44879FBCADDE,01620FE0-E907-47F4-9368-5B14EBF69BE4,0.9555555555555556,1.0,99,0
+095AD921-9B08-468E-817A-44879FBCADDE,02620FE0-E907-47F4-9368-5B14EBF69BE4,0.9555555555555556,1.0,99,0
+095AD921-9B08-468E-817A-44879FBCADDE,03620FE0-E907-47F4-9368-5B14EBF69BE4,0.9555555555555556,1.0,99,0
+095AD921-9B08-468E-817A-44879FBCADDE,00620FE0-E907-47F4-9368-5B14EBF69BE4,0.9555555555555556,1.0,99,0
+6F6D3D9A-A2C4-471C-A19A-5EFE90AAA5C7,00669345-C937-4405-A0F0-1FCA5204DF64,1.0,0.9428571428571428,6,0
+EAD03D68-F21D-4A74-8C16-F9123F5288D7,007EDAE5-BDEF-4819-969E-7DAA8DC3FDD6,1.0,1.0,6,0
+AF3C7686-98EF-46F5-B5DF-DE8CC50A93DC,00849961-E52F-42F2-9B70-052606223052,1.0,1.0,99,0
+8A50FA06-BAF8-4EC5-9726-2EB3551CD6D1,00C4291F-7064-4A81-8589-5854C367EEC4,1.0,1.0,3,0
+F2798AB4-3217-4D0A-A6A0-6B390A3C4B7A,010F244F-94D0-4295-82DB-0E172724358A,1.0,0.9,3,0
+D30C40B9-2E7C-4933-84CE-CEAAB37E3209,01230024-F3C6-4D4A-86DF-F9EF29F77292,1.0,1.0,8,0
+CCBA170F-93D0-42C3-A57B-CCABBF2772FB,0141C5C9-32DC-469E-862C-A6CCD8A2EB2B,0.9333333333333333,1.0,4,0
+4F29F4B1-F953-4FC8-A7FB-42F54FB51E73,016EF43B-E70F-440E-882E-E447663F682F,0.9066666666666667,1.0,6,0
+CC7B3BF9-AEE5-4ECB-9F5D-9F910346B9CD,018C7B94-4387-4149-9B2D-CA7BB18AA559,1.0,1.0,3,0
+2CC7B61B-6821-4B2B-A283-8FE8D557D6F6,019D26A0-0335-48B5-A6D6-1D499424BE84,1.0,0.977961432506887,2,0
+195EA695-D047-4045-8757-E7A22F12E148,0269E114-0EDD-4767-AE9F-B6557CD880EE,0.9,1.0,3,0
+74941094-9737-40F0-BF3C-0C2380B08040,0282109F-581C-4B8E-A99D-135CF0077C2E,0.9066666666666667,1.0,5,0
+F0F34E2F-49CC-4F06-8CC4-691CF3150244,02A06F96-AAD4-4EE2-B20B-CD1A4ED33D46,1.0,1.0,7,0
+6EB222E3-EB8F-4E20-BCE0-2C12F926ABB1,033FD0FA-C523-42B5-976A-751E830F7021,1.0,0.9333333333333333,8,0
+EE22ED8E-9544-4C77-A689-75895376E3EB,0350987D-D6B3-4519-A7C0-96C5B45111D6,1.0,1.0,3,0
+47DB90F0-6A7B-421F-9B18-CAB1CFA45E71,03522784-6097-4A7A-A54E-C6AA8E22BF20,1.0,1.0,2,0
+7E20FBBE-9B4B-4FAB-9433-CB77D9E6B022,038F08DA-12C8-4AF2-B5DD-43BB2A58DAA1,0.9666666666666667,0.9611111111111111,6,0
+A859D9BC-6106-43A2-8A47-B12D9D2C49C8,039AE50E-84E7-49A7-B720-48D2C765C5D5,1.0,1.0,1,0
+E19E5381-C68D-4E03-A688-597DF13311CE,03B89FD5-872A-4504-9758-F5AA1607BA01,1.0,0.9444444444444444,5,0
+671DE512-479B-4EEB-85B4-93A848E6BDD7,03DD4EB7-0FA7-4AA4-A510-79448E316A43,1.0,1.0,5,0
+81E992C0-3796-4BE7-B02E-9CAD0289C6EC,03FFD04A-DC09-47EC-84EF-A0DD3E9C0528,1.0,0.9925925925925926,9,0
diff --git a/hlink/tests/input_data/prepped_df_a_agg.csv b/hlink/tests/input_data/prepped_df_a_agg.csv
new file mode 100644
index 0000000..53a72ad
--- /dev/null
+++ b/hlink/tests/input_data/prepped_df_a_agg.csv
@@ -0,0 +1,25 @@
+histid,bpl,namelast_clean,namefrst_unstd,sex
+0202928A-AC3E-48BB-8568-3372067F35C7,3100,cridlebaugh,gerald,1
+1E5D9C44-3D8E-40F8-A843-8E7619CF7B94,3600,symonds,horace,1
+095AD921-9B08-468E-817A-44879FBCADDE,60094,abrahams,isiah,1
+6F6D3D9A-A2C4-471C-A19A-5EFE90AAA5C7,4700,eilbatt,reginald,1
+EAD03D68-F21D-4A74-8C16-F9123F5288D7,2100,knopke,andrew,1
+AF3C7686-98EF-46F5-B5DF-DE8CC50A93DC,15010,caldwell,daisy,2
+8A50FA06-BAF8-4EC5-9726-2EB3551CD6D1,1700,sonnenschein,max,1
+F2798AB4-3217-4D0A-A6A0-6B390A3C4B7A,5500,gibson,dwight,1
+D30C40B9-2E7C-4933-84CE-CEAAB37E3209,5600,hegewald,karl,1
+CCBA170F-93D0-42C3-A57B-CCABBF2772FB,3800,king,virgel,1
+4F29F4B1-F953-4FC8-A7FB-42F54FB51E73,4700,looney,sadie,2
+CC7B3BF9-AEE5-4ECB-9F5D-9F910346B9CD,1700,rydstrom,hubert,1
+2CC7B61B-6821-4B2B-A283-8FE8D557D6F6,3600,mugrdickian,misak,1
+195EA695-D047-4045-8757-E7A22F12E148,3900,brightman,austin,1
+74941094-9737-40F0-BF3C-0C2380B08040,5400,harman,eston,1
+F0F34E2F-49CC-4F06-8CC4-691CF3150244,4000,oglesby,stephen,1
+6EB222E3-EB8F-4E20-BCE0-2C12F926ABB1,5600,kassik,james,1
+EE22ED8E-9544-4C77-A689-75895376E3EB,1700,wood,dudley,1
+47DB90F0-6A7B-421F-9B18-CAB1CFA45E71,4200,foulkrod,s,1
+7E20FBBE-9B4B-4FAB-9433-CB77D9E6B022,100,huges,keneth,1
+A859D9BC-6106-43A2-8A47-B12D9D2C49C8,5000,caldwell,nathan,1
+E19E5381-C68D-4E03-A688-597DF13311CE,1200,platta,norman,1
+671DE512-479B-4EEB-85B4-93A848E6BDD7,1300,lipscomb,roy,1
+81E992C0-3796-4BE7-B02E-9CAD0289C6EC,200,woodburne,walter,1
diff --git a/hlink/tests/input_data/prepped_df_b_agg.csv b/hlink/tests/input_data/prepped_df_b_agg.csv
new file mode 100644
index 0000000..245860a
--- /dev/null
+++ b/hlink/tests/input_data/prepped_df_b_agg.csv
@@ -0,0 +1,31 @@
+histid,bpl,namelast_clean,namefrst_unstd,sex
+001B8A74-3795-4997-BC5B-2A07257668F9,3100,cridlebaugh,gerald,1
+002B8A74-3795-4997-BC5B-2A07257668F9,3100,cridlebaugh,gerald,1
+003B8A74-3795-4997-BC5B-2A07257668F9,3100,cridlebaugh,gerald,1
+00327A22-FF1E-400A-9A8A-1752A60BE7CF,3600,symonds,horace,1
+00427A22-FF1E-400A-9A8A-1752A60BE7CF,3600,symonds,horace,1
+01620FE0-E907-47F4-9368-5B14EBF69BE4,60094,abrahams,isniah,1
+02620FE0-E907-47F4-9368-5B14EBF69BE4,60094,abrahams,isniah,1
+03620FE0-E907-47F4-9368-5B14EBF69BE4,60094,abrahams,isniah,1
+00620FE0-E907-47F4-9368-5B14EBF69BE4,60094,abrahams,isniah,1
+00669345-C937-4405-A0F0-1FCA5204DF64,4700,eilbott,reginald,1
+007EDAE5-BDEF-4819-969E-7DAA8DC3FDD6,2100,knopke,andrew,1
+00849961-E52F-42F2-9B70-052606223052,15010,caldwell,daisy,2
+00C4291F-7064-4A81-8589-5854C367EEC4,1700,sonnenschein,max,1
+010F244F-94D0-4295-82DB-0E172724358A,5500,gebson,dwight,1
+01230024-F3C6-4D4A-86DF-F9EF29F77292,5600,hegewald,karl,1
+0141C5C9-32DC-469E-862C-A6CCD8A2EB2B,3800,king,virgil,1
+016EF43B-E70F-440E-882E-E447663F682F,4700,looney,sadye,2
+018C7B94-4387-4149-9B2D-CA7BB18AA559,1700,rydstrom,hubert,1
+019D26A0-0335-48B5-A6D6-1D499424BE84,3600,mugrdichian,misak,1
+0269E114-0EDD-4767-AE9F-B6557CD880EE,3900,brightman,anstin,1
+0282109F-581C-4B8E-A99D-135CF0077C2E,5400,harman,estan,1
+02A06F96-AAD4-4EE2-B20B-CD1A4ED33D46,4000,oglesby,stephen,1
+033FD0FA-C523-42B5-976A-751E830F7021,5600,kassek,james,1
+0350987D-D6B3-4519-A7C0-96C5B45111D6,1700,wood,dudley,1
+03522784-6097-4A7A-A54E-C6AA8E22BF20,4200,foulkrod,s,1
+038F08DA-12C8-4AF2-B5DD-43BB2A58DAA1,100,hughes,kenneth,1
+039AE50E-84E7-49A7-B720-48D2C765C5D5,5000,caldwell,nathan,1
+03B89FD5-872A-4504-9758-F5AA1607BA01,1200,platts,norman,1
+03DD4EB7-0FA7-4AA4-A510-79448E316A43,1300,lipscomb,roy,1
+03FFD04A-DC09-47EC-84EF-A0DD3E9C0528,200,woodburn,walter,1
diff --git a/hlink/tests/main_loop_test.py b/hlink/tests/main_loop_test.py
index f9ba9e5..8d16325 100755
--- a/hlink/tests/main_loop_test.py
+++ b/hlink/tests/main_loop_test.py
@@ -19,10 +19,6 @@ def test_do_get_steps(capsys, main, spark):
         main.do_get_steps("")
         output = capsys.readouterr().out
         for step in steps:
-            if str(step) not in output:
-                print(type(step))
-                print(step)
-                print(output)
             assert str(step) in output
 
 
diff --git a/hlink/tests/matching_potential_matches_test.py b/hlink/tests/matching_potential_matches_test.py
index c626df2..ac1e552 100755
--- a/hlink/tests/matching_potential_matches_test.py
+++ b/hlink/tests/matching_potential_matches_test.py
@@ -4,90 +4,6 @@
 #   https://github.com/ipums/hlink
 
 from jinja2 import Environment, PackageLoader
-import pytest
-
-
-@pytest.mark.skip(
-    reason="We still want to test that these aggregate features are being created correctly, but we need to refactor this test to account for the fact that aggregate features are now being created in a different step (step 4 doesn't exist anymore and the functionality was moved in the code)."
-)
-def test_step_4_aggregate_features(
-    spark, matching_conf, matching, potential_matches_agg_path
-):
-    """Test adding aggregate features (hits, hits2, exact_all_mult, etc.) to potential matches"""
-    matching_conf["id_column"] = "histid"
-    matching_conf["comparison_features"] = [
-        {
-            "alias": "namelast_jw",
-            "column_name": "namelast",
-            "comparison_type": "jaro_winkler",
-        },
-        {"alias": "exact"},
-        {"alias": "exact_all"},
-    ]
-    matching_conf["training"] = {
-        "independent_vars": [
-            "namelast_jw",
-            "exact",
-            "exact_all",
-            "hits",
-            "hits2",
-            "exact_mult",
-            "exact_all_mult",
-            "exact_all_mult2",
-        ]
-    }
-
-    potential_matches = matching.spark.read.csv(
-        potential_matches_agg_path, header=True, inferSchema=True
-    )
-    potential_matches.write.mode("overwrite").saveAsTable("potential_matches")
-    matching.step_4_aggregate_features()
-
-    pm_df = matching.spark.table("potential_matches").toPandas()
-
-    assert pm_df.shape == (30, 21)
-    assert (
-        pm_df.query(
-            "namelast_clean_a == 'cridlebaugh' and histid_b == '001B8A74-3795-4997-BC5B-2A07257668F9'"
-        )["exact"].iloc[0]
-        == 1
-    )
-    assert (
-        pm_df.query(
-            "namelast_clean_a == 'cridlebaugh' and histid_b == '001B8A74-3795-4997-BC5B-2A07257668F9'"
-        )["exact_all"].iloc[0]
-        == 1
-    )
-    assert (
-        pm_df.query(
-            "namelast_clean_a == 'cridlebaugh' and histid_b == '001B8A74-3795-4997-BC5B-2A07257668F9'"
-        )["hits"].iloc[0]
-        == 3
-    )
-    assert (
-        pm_df.query(
-            "namelast_clean_a == 'cridlebaugh' and histid_b == '001B8A74-3795-4997-BC5B-2A07257668F9'"
-        )["hits2"].iloc[0]
-        == 9
-    )
-    assert (
-        pm_df.query(
-            "namelast_clean_a == 'cridlebaugh' and histid_b == '001B8A74-3795-4997-BC5B-2A07257668F9'"
-        )["exact_mult"].iloc[0]
-        == 3
-    )
-    assert (
-        pm_df.query(
-            "namelast_clean_a == 'cridlebaugh' and histid_b == '001B8A74-3795-4997-BC5B-2A07257668F9'"
-        )["exact_all_mult"].iloc[0]
-        == 3
-    )
-    assert (
-        pm_df.query(
-            "namelast_clean_a == 'cridlebaugh' and histid_b == '001B8A74-3795-4997-BC5B-2A07257668F9'"
-        )["exact_all_mult2"].iloc[0]
-        == 9
-    )
 
 
 def test_potential_matches_sql_template() -> None:
diff --git a/hlink/tests/matching_scoring_test.py b/hlink/tests/matching_scoring_test.py
index a624618..613e1f6 100755
--- a/hlink/tests/matching_scoring_test.py
+++ b/hlink/tests/matching_scoring_test.py
@@ -3,84 +3,13 @@
 # in this project's top-level directory, and also on-line at:
 #   https://github.com/ipums/hlink
 
-import hlink.tests
 import pandas as pd
-import pytest
 import hlink.linking.core.threshold as threshold_core
 from hlink.linking.matching.link_step_score import LinkStepScore
 
 
-@pytest.mark.skip(
-    reason="We still want to test that whatever 'secondary_threshold' became is being applied correctly, but we need to refactor this test to account for the fact that this was totally renamed and is now being carried out in a different step (step 3 doesn't exist anymore)."
-)
-def test_step_3_uniq_and_secondary_threshold(spark, matching_conf, matching):
-    """Test a secondary threshold with uniqueness"""
-    matching_conf["comparison_features"] = [
-        {
-            "alias": "namefrst_jw",
-            "column_name": "namefrst",
-            "comparison_type": "jaro_winkler",
-        },
-        {
-            "alias": "namelast_jw",
-            "column_name": "namelast",
-            "comparison_type": "jaro_winkler",
-        },
-    ]
-
-    matching_conf["comparisons"] = {
-        "comp_a": {
-            "feature_name": "namefrst_jw",
-            "threshold": 0.8,
-            "comparison_type": "threshold",
-        },
-        "comp_b": {
-            "feature_name": "namelast_jw",
-            "comparison_type": "threshold",
-            "threshold": 0.8,
-        },
-        "operator": "AND",
-    }
-
-    matching_conf["secondary_threshold"] = {
-        "threshold_a": {
-            "feature_name": "namefrst_jw",
-            "comparison_type": "threshold",
-            "threshold": 0.9,
-        },
-        "threshold_b": {
-            "feature_name": "namelast_jw",
-            "comparison_type": "threshold",
-            "threshold": 0.9,
-        },
-        "unique_true": {"id_a": "id_a", "id_b": "id_b"},
-        "operator": "AND",
-        "secondary": True,
-    }
-
-    matching.step_0_explode()
-    matching.step_1_match()
-    hlink.linking.matching._step_2_score.__create_features(matching, matching_conf)
-
-    # Create pandas DFs of the step_2 potential matches table
-    potential_matches_df = spark.table("potential_matches_prepped").toPandas()
-
-    #    matching.step_3_secondary_threshold()
-    # unique_matches_df = spark.table("potential_matches").toPandas()
-    unique_high_matches_df = spark.table("potential_matches_prepped").toPandas()
-
-    assert len(potential_matches_df.id_a) == 5
-    # assert (len(unique_matches_df.id_a) == 1)
-    # assert (unique_matches_df.query("id_a == 10 and id_b == 10")["namelast_jw"].iloc[0] > 0.8)
-    # assert (unique_matches_df.query("id_a == 10 and id_b == 10")["namelast_jw"].iloc[0] < 0.9)
-    # assert (unique_matches_df.query("id_a == 10 and id_b == 10")["namefrst_jw"].iloc[0] > 0.8)
-    # assert (unique_matches_df.query("id_a == 10 and id_b == 10")["namefrst_jw"].iloc[0] > 0.9)
-    assert unique_high_matches_df.empty
-
-
-# TODO: is there a step 3 anymore?
-def test_step_3_skip_on_no_conf(spark, matching_conf, matching, capsys):
-    """Test matching step 3 doesn't run if no training config"""
+def test_step_2_skip_on_no_conf(spark, matching_conf, matching, capsys):
+    """Test matching step 2 doesn't run if no training config"""
 
     matching_conf["comparison_features"] = [
         {
@@ -102,11 +31,10 @@ def test_step_3_skip_on_no_conf(spark, matching_conf, matching, capsys):
     )
 
 
-# TODO: is there a step 3 any more?
-def test_step_3_alpha_beta_thresholds(
+def test_step_2_alpha_beta_thresholds(
     spark, matching, matching_conf, threshold_ratio_data_path_2
 ):
-    """Test matching step 3 with both probability and ratio thresholds"""
+    """Test matching step 2 with both probability and ratio thresholds"""
 
     matching.spark.read.csv(
         threshold_ratio_data_path_2, header=True, inferSchema=True
@@ -170,3 +98,74 @@ def test_step_3_alpha_beta_thresholds(
 
     assert tp.query("histid_a == '5a' and histid_b == '7b'")["prediction"].iloc[0] == 1
     assert tp.query("histid_a == '5a' and histid_b == '6b'")["prediction"].iloc[0] == 0
+
+
+def test_step_2_aggregate_features(
+    spark, matching_conf, matching, agg_features_datasources
+):
+    matching_conf["id_column"] = "histid"
+    matching_conf["comparison_features"] = [
+        {
+            "alias": "namelast_jw",
+            "column_name": "namelast",
+            "comparison_type": "jaro_winkler",
+        },
+        {
+            "alias": "exact",
+            "column_names": ["namefrst_unstd", "namelast_clean"],
+            "comparison_type": "all_equals",
+        },
+        {
+            "alias": "exact_all",
+            "column_names": ["namefrst_unstd", "namelast_clean", "bpl"],
+            "comparison_type": "all_equals",
+        },
+    ]
+    matching_conf["training"] = {
+        "independent_vars": [
+            "namelast_jw",
+            "exact",
+            "exact_all",
+            "hits",
+            "hits2",
+            "exact_mult",
+            "exact_all_mult",
+            "exact_all_mult2",
+        ],
+        "chosen_model": {
+            "type": "probit",
+            "threshold": 0.5,
+        },
+        "dependent_var": "match",
+    }
+
+    potential_matches_path, prepped_df_a_path, prepped_df_b_path = (
+        agg_features_datasources
+    )
+    spark.read.csv(potential_matches_path, header=True, inferSchema=True).write.mode(
+        "overwrite"
+    ).saveAsTable("potential_matches")
+
+    spark.read.csv(prepped_df_a_path, header=True, inferSchema=True).write.mode(
+        "overwrite"
+    ).saveAsTable("prepped_df_a")
+    spark.read.csv(prepped_df_b_path, header=True, inferSchema=True).write.mode(
+        "overwrite"
+    ).saveAsTable("prepped_df_b")
+
+    link_step_score = LinkStepScore(matching)
+    link_step_score._create_features(matching_conf)
+
+    pm_prepped = spark.table("potential_matches_prepped").toPandas()
+
+    filtered = pm_prepped.query(
+        "histid_a == '0202928A-AC3E-48BB-8568-3372067F35C7' and histid_b == '001B8A74-3795-4997-BC5B-2A07257668F9'"
+    )
+
+    assert filtered["exact"].item()
+    assert filtered["exact_all"].item()
+    assert filtered["hits"].item() == 3
+    assert filtered["hits2"].item() == 9
+    assert filtered["exact_mult"].item()
+    assert filtered["exact_all_mult"].item() == 3
+    assert filtered["exact_all_mult2"].item() == 9
diff --git a/hlink/tests/model_exploration_test.py b/hlink/tests/model_exploration_test.py
index 8fb1e17..e0cf593 100644
--- a/hlink/tests/model_exploration_test.py
+++ b/hlink/tests/model_exploration_test.py
@@ -552,169 +552,3 @@ def test_step_2_split_by_id_a(
     assert splits[1][1].toPandas()["id_a"].unique().tolist() == ["30"]
 
     main.do_drop_all("")
-
-
-@pytest.mark.skip(
-    reason="Need to get tests working for new version of feature importances"
-)
-def test_step_3_get_feature_importances_random_forest(
-    spark,
-    training_conf,
-    training,
-    state_dist_path,
-    datasource_training_input,
-    potential_matches_path,
-    spark_test_tmp_dir_path,
-    model_exploration,
-):
-    """Test running the chosen model on potential matches dataset"""
-    td_path, pa_path, pb_path = datasource_training_input
-
-    training_conf["comparison_features"] = [
-        {
-            "alias": "regionf",
-            "column_name": "region",
-            "comparison_type": "fetch_a",
-            "categorical": True,
-        },
-        {
-            "alias": "namelast_jw",
-            "column_name": "namelast",
-            "comparison_type": "jaro_winkler",
-        },
-        {
-            "alias": "state_distance",
-            "column_name": "bpl",
-            "key_count": 1,
-            "comparison_type": "geo_distance",
-            "loc_a": "statecode1",
-            "loc_b": "statecode2",
-            "distance_col": "dist",
-            "table_name": "state_distances_lookup",
-            "distances_file": state_dist_path,
-        },
-    ]
-
-    training_conf["training"]["dataset"] = td_path
-    training_conf["training"]["dependent_var"] = "match"
-    training_conf["training"]["independent_vars"] = [
-        "namelast_jw",
-        "regionf",
-        "state_distance",
-    ]
-    training_conf["training"]["chosen_model"] = {
-        "type": "random_forest",
-        "maxDepth": 6,
-        "numTrees": 100,
-        "featureSubsetStrategy": "sqrt",
-    }
-
-    # training_conf["training"]["use_potential_matches_features"] = True
-    training_conf["training"]["score_with_model"] = True
-    training_conf["training"]["feature_importances"] = True
-    training_conf["spark_tmp_dir"] = spark_test_tmp_dir_path
-    training_conf["drop_data_from_scored_matches"] = True
-
-    training.spark.read.csv(pa_path, header=True, inferSchema=True).write.mode(
-        "overwrite"
-    ).saveAsTable("prepped_df_a")
-    training.spark.read.csv(pb_path, header=True, inferSchema=True).write.mode(
-        "overwrite"
-    ).saveAsTable("prepped_df_b")
-    training.spark.read.csv(
-        potential_matches_path, header=True, inferSchema=True
-    ).write.mode("overwrite").saveAsTable("potential_matches")
-
-    training.run_step(0)
-    training.run_step(1)
-    training.run_step(2)
-
-    model_exploration.run_step(3)
-
-    fi_df = training.spark.table("feature_importances").toPandas()
-
-    assert fi_df.shape == (6, 3)
-    assert 1 > fi_df.query("idx == 0")["score"].iloc()[0] >= 0
-    assert "regionf_onehotencoded_2" in list(fi_df["name"])
-    assert "regionf_onehotencoded_invalidValues" in list(fi_df["name"])
-
-
-@pytest.mark.skip(
-    reason="Need to get tests working for new version of feature importances"
-)
-def test_step_3_get_feature_importances_probit(
-    spark,
-    training_conf,
-    training,
-    state_dist_path,
-    datasource_training_input,
-    potential_matches_path,
-    spark_test_tmp_dir_path,
-    matching,
-):
-    """Test running the chosen model on potential matches dataset"""
-    td_path, pa_path, pb_path = datasource_training_input
-
-    training_conf["comparison_features"] = [
-        {
-            "alias": "regionf",
-            "column_name": "region",
-            "comparison_type": "fetch_a",
-            "categorical": True,
-        },
-        {
-            "alias": "namelast_jw",
-            "column_name": "namelast",
-            "comparison_type": "jaro_winkler",
-        },
-        {
-            "alias": "state_distance",
-            "key_count": 1,
-            "column_name": "bpl",
-            "comparison_type": "geo_distance",
-            "loc_a": "statecode1",
-            "loc_b": "statecode2",
-            "distance_col": "dist",
-            "table_name": "state_distances_lookup",
-            "distances_file": state_dist_path,
-        },
-    ]
-
-    training_conf["training"]["dataset"] = td_path
-    training_conf["training"]["dependent_var"] = "match"
-    training_conf["training"]["independent_vars"] = [
-        "namelast_jw",
-        "regionf",
-        "state_distance",
-    ]
-
-    training_conf["training"]["chosen_model"] = {"type": "probit", "threshold": 0.5}
-
-    # training_conf["training"]["use_potential_matches_features"] = True
-    training_conf["training"]["score_with_model"] = True
-    training_conf["training"]["feature_importances"] = True
-    training_conf["spark_tmp_dir"] = spark_test_tmp_dir_path
-    training_conf["drop_data_from_scored_matches"] = True
-
-    training.spark.read.csv(pa_path, header=True, inferSchema=True).write.mode(
-        "overwrite"
-    ).saveAsTable("prepped_df_a")
-    training.spark.read.csv(pb_path, header=True, inferSchema=True).write.mode(
-        "overwrite"
-    ).saveAsTable("prepped_df_b")
-    training.spark.read.csv(
-        potential_matches_path, header=True, inferSchema=True
-    ).write.mode("overwrite").saveAsTable("potential_matches")
-
-    training.run_step(0)
-    training.run_step(1)
-    training.run_step(2)
-    matching.run_step(2)
-    training.run_step(3)
-
-    fi_df = training.spark.table("feature_importances").toPandas()
-
-    assert fi_df.shape == (6, 3)
-    assert 25 > fi_df.query("idx == 0")["score"].iloc()[0] >= -5
-    assert "regionf_onehotencoded_2" in list(fi_df["name"])
-    assert "regionf_onehotencoded_invalidValues" in list(fi_df["name"])
diff --git a/hlink/tests/plugins/external_data_paths.py b/hlink/tests/plugins/external_data_paths.py
index a57947f..f04c525 100755
--- a/hlink/tests/plugins/external_data_paths.py
+++ b/hlink/tests/plugins/external_data_paths.py
@@ -148,15 +148,18 @@ def potential_matches_path_ids_only(spark):
 
 
 @pytest.fixture(scope="module")
-def potential_matches_agg_path(spark):
-    """Create a fixture with the path to the test potential_matches csv file"""
-
-    path = "input_data/potential_matches_agg.csv"
+def agg_features_datasources() -> tuple[str, str, str]:
+    """Return the path to the potential_matches, prepped_df_a, and prepped_df_b csv data files."""
+    potential_matches_path = "input_data/potential_matches_agg.csv"
+    prepped_df_a_path = "input_data/prepped_df_a_agg.csv"
+    prepped_df_b_path = "input_data/prepped_df_b_agg.csv"
 
     package_path = os.path.dirname(hlink.tests.__file__)
-    full_path = os.path.join(package_path, path)
+    full_pm_path = os.path.join(package_path, potential_matches_path)
+    full_prepped_a_path = os.path.join(package_path, prepped_df_a_path)
+    full_prepped_b_path = os.path.join(package_path, prepped_df_b_path)
 
-    return full_path
+    return full_pm_path, full_prepped_a_path, full_prepped_b_path
 
 
 @pytest.fixture(scope="module")
diff --git a/hlink/tests/training_test.py b/hlink/tests/training_test.py
index 3730c02..0fbdb0a 100644
--- a/hlink/tests/training_test.py
+++ b/hlink/tests/training_test.py
@@ -349,6 +349,89 @@ def test_step_3_interacted_categorical_features(
     )
 
 
+def test_step_3_with_probit_model(
+    spark, training_conf, training, state_dist_path, datasource_training_input
+):
+    training_data_path, prepped_df_a_path, prepped_df_b_path = datasource_training_input
+    """Run training step 3 with a probit ML model."""
+    training_conf["comparison_features"] = [
+        {
+            "alias": "regionf",
+            "column_name": "region",
+            "comparison_type": "fetch_a",
+            "categorical": True,
+        },
+        {
+            "alias": "namelast_jw",
+            "column_name": "namelast",
+            "comparison_type": "jaro_winkler",
+        },
+        {
+            "alias": "state_distance",
+            "key_count": 1,
+            "column_name": "bpl",
+            "comparison_type": "geo_distance",
+            "loc_a": "statecode1",
+            "loc_b": "statecode2",
+            "distance_col": "dist",
+            "table_name": "state_distances_lookup",
+            "distances_file": state_dist_path,
+        },
+    ]
+    training_conf["training"]["dataset"] = training_data_path
+    training_conf["training"]["dependent_var"] = "match"
+    training_conf["training"]["independent_vars"] = [
+        "namelast_jw",
+        "regionf",
+        "state_distance",
+    ]
+
+    training_conf["training"]["chosen_model"] = {"type": "probit", "threshold": 0.5}
+    training_conf["training"]["score_with_model"] = True
+    training_conf["training"]["feature_importances"] = True
+
+    spark.read.csv(prepped_df_a_path, header=True, inferSchema=True).write.mode(
+        "overwrite"
+    ).saveAsTable("prepped_df_a")
+    spark.read.csv(prepped_df_b_path, header=True, inferSchema=True).write.mode(
+        "overwrite"
+    ).saveAsTable("prepped_df_b")
+
+    training.run_step(0)
+    training.run_step(1)
+    training.run_step(2)
+    training.run_step(3)
+
+    tfi = spark.table("training_feature_importances").toPandas()
+    assert (
+        8.9
+        <= tfi.query("feature_name == 'namelast_jw'")[
+            "coefficient_or_importance"
+        ].item()
+        <= 9.0
+    )
+    assert (
+        tfi.query("feature_name == 'regionf' and category == 0")[
+            "coefficient_or_importance"
+        ].item()
+        == 0
+    )
+    assert (
+        -7.6
+        <= tfi.query("feature_name == 'regionf' and category == 1")[
+            "coefficient_or_importance"
+        ].item()
+        <= -7.5
+    )
+    assert (
+        6.4
+        <= tfi.query("feature_name == 'regionf' and category == 99")[
+            "coefficient_or_importance"
+        ].item()
+        <= 6.5
+    )
+
+
 def test_step_3_requires_table(training_conf, training):
     training_conf["training"]["feature_importances"] = True
     with pytest.raises(RuntimeError, match="Missing input tables"):