From 322ff21b2aa6b2622307eade4d5b6b86786b1032 Mon Sep 17 00:00:00 2001 From: James Gaboardi Date: Sat, 13 Jun 2020 18:48:10 -0400 Subject: [PATCH 1/4] adding tests and logic for id_codes.generate_geoid() --- nhgisxwalk/id_codes.py | 6 ++ nhgisxwalk/tests/test_nhgisxwalk.py | 134 +++++++++++++++++++++++++++- 2 files changed, 139 insertions(+), 1 deletion(-) diff --git a/nhgisxwalk/id_codes.py b/nhgisxwalk/id_codes.py index dc15e3c..0a2b681 100644 --- a/nhgisxwalk/id_codes.py +++ b/nhgisxwalk/id_codes.py @@ -113,6 +113,12 @@ def generate_geoid(in_id): if str(in_id) == "nan": out_id = in_id + elif str(in_id).replace(".", "").isdigit(): + if not str(in_id).replace(".", "", 1).isdigit(): + raise ValueError("'in_id' has too many decimals to be a float: %s." % in_id) + raise TypeError("Check the data type of '%s'." % in_id) + elif not in_id.startswith("G"): + raise ValueError("Check the NHGIS prefix of '%s'." % in_id) else: components = [in_id[1:3], in_id[4:7], in_id[8:12], in_id[12:]] out_id = "".join(components) diff --git a/nhgisxwalk/tests/test_nhgisxwalk.py b/nhgisxwalk/tests/test_nhgisxwalk.py index b8e01d1..b3244b7 100644 --- a/nhgisxwalk/tests/test_nhgisxwalk.py +++ b/nhgisxwalk/tests/test_nhgisxwalk.py @@ -170,7 +170,105 @@ def test_xwalk_state_bgp1990_trt2010(self,): numpy.testing.assert_equal(knw_str_vals, obs_str_vals) numpy.testing.assert_allclose(knw_num_vals, obs_num_vals, atol=6) - ###################################################################################################################### + def test_xwalk_extract_state_bgp1990_trt2010(self): + known_target_nan_xwalk = numpy.empty((0, 7)) + known_source_nan_xwalk = numpy.array( + [[numpy.nan, "G1000050990000", "10005990000", 0.0, 0.0, 0.0, 0.0]], + dtype=object, + ) + known_target_nan_base = numpy.empty((0, 6)) + known_source_nan_base_shape = (149, 6) + obs_xwalk = nhgisxwalk.GeoCrossWalk( + base_xwalk_blk1990_blk2010, + source_year=_90, + target_year=_10, + source_geo=bgp, + target_geo=trt, + base_source_table=tab_data_path_1990, + supp_source_table=supplement_data_path_90, + input_var=input_vars_1990, + weight_var=input_var_tags, + keep_base=True, + stfips=stfips, + ) + obs_target_nan_xwalk = obs_xwalk.extract_state("nan", endpoint="target").values + numpy.testing.assert_array_equal(known_target_nan_xwalk, obs_target_nan_xwalk) + obs_source_nan_xwalk = obs_xwalk.extract_state("nan", endpoint="source").values + numpy.testing.assert_array_equal( + known_source_nan_xwalk[0, 0], obs_source_nan_xwalk[0, 0] + ) + numpy.testing.assert_array_equal( + known_source_nan_xwalk[0, 1:3], obs_source_nan_xwalk[0, 1:3] + ) + obs_target_nan_base = obs_xwalk.extract_state( + "nan", endpoint="target", from_base=True + ).values + numpy.testing.assert_array_equal(known_target_nan_base, obs_target_nan_base) + obs_source_nan_base = obs_xwalk.extract_state( + "nan", endpoint="source", from_base=True + ).values + self.assertEqual(known_source_nan_base_shape, obs_source_nan_base.shape) + + def test_xwalk_extract_state_failure_bgp1990_trt2010(self): + with self.assertRaises(RuntimeError): + known_target_nan_base = numpy.empty((0, 6)) + obs_xwalk = nhgisxwalk.GeoCrossWalk( + base_xwalk_blk1990_blk2010, + source_year=_90, + target_year=_10, + source_geo=bgp, + target_geo=trt, + base_source_table=tab_data_path_1990, + supp_source_table=supplement_data_path_90, + input_var=input_vars_1990, + weight_var=input_var_tags, + keep_base=False, + stfips=stfips, + ) + obs_target_nan_base = obs_xwalk.extract_state( + "nan", endpoint="target", from_base=True + ).values + numpy.testing.assert_array_equal(known_target_nan_base, obs_target_nan_base) + with self.assertRaises(RuntimeError): + known_source_nan_base = numpy.empty((0, 6)) + obs_xwalk = nhgisxwalk.GeoCrossWalk( + base_xwalk_blk1990_blk2010, + source_year=_90, + target_year=_10, + source_geo=bgp, + target_geo=trt, + base_source_table=tab_data_path_1990, + supp_source_table=supplement_data_path_90, + input_var=input_vars_1990, + weight_var=input_var_tags, + keep_base=False, + stfips=stfips, + ) + obs_source_nan_base = obs_xwalk.extract_state( + "nan", endpoint="source", from_base=True + ).values + numpy.testing.assert_array_equal(known_source_nan_base, obs_source_nan_base) + + def test_xwalk_extract_unique_stfips_bgp1990_trt2010(self): + known_target_fips = set(["10"]) + known_source_fips = set(["10", "34", "nan"]) + obs_xwalk = nhgisxwalk.GeoCrossWalk( + base_xwalk_blk1990_blk2010, + source_year=_90, + target_year=_10, + source_geo=bgp, + target_geo=trt, + base_source_table=tab_data_path_1990, + supp_source_table=supplement_data_path_90, + input_var=input_vars_1990, + weight_var=input_var_tags, + keep_base=False, + stfips=stfips, + ) + obs_target_fips = obs_xwalk.extract_unique_stfips(endpoint="target") + self.assertEqual(known_target_fips, obs_target_fips) + obs_source_fips = obs_xwalk.extract_unique_stfips(endpoint="source") + self.assertEqual(known_source_fips, obs_source_fips) # 2000 bgp to 2010 trt through 2000 blk to 2010 blk def test_xwalk_full_bgp2000_trt2010(self): @@ -571,5 +669,39 @@ def test_valid_geo_shorthand(self): self.assertEqual(known_ns, observed_ns) +class Test_id_codes_functions(unittest.TestCase): + def setUp(self): + pass + + def test_generate_geoid_nan(self): + known_value = numpy.nan + observed_value = nhgisxwalk.id_codes.generate_geoid(numpy.nan) + self.assertEqual(numpy.isnan(known_value), numpy.isnan(observed_value)) + + def test_generate_geoid_digit_str(self): + with self.assertRaises(TypeError): + nhgisxwalk.id_codes.generate_geoid("1") + + def test_generate_geoid_digit_int(self): + with self.assertRaises(TypeError): + nhgisxwalk.id_codes.generate_geoid(1) + + def test_generate_geoid_digit_str(self): + with self.assertRaises(TypeError): + nhgisxwalk.id_codes.generate_geoid("1.1") + + def test_generate_geoid_digit_float(self): + with self.assertRaises(TypeError): + nhgisxwalk.id_codes.generate_geoid(1.1) + + def test_generate_geoid_bad_char_int(self): + with self.assertRaises(ValueError): + nhgisxwalk.id_codes.generate_geoid("X1") + + def test_generate_geoid_bad_char_float(self): + with self.assertRaises(ValueError): + nhgisxwalk.id_codes.generate_geoid("X1.1") + + if __name__ == "__main__": unittest.main() From eeb1ed263a24a0c75f9508f23080c6c213675aac Mon Sep 17 00:00:00 2001 From: James Gaboardi Date: Sat, 13 Jun 2020 19:46:42 -0400 Subject: [PATCH 2/4] more tests in id_codes.py --- nhgisxwalk/id_codes.py | 5 ++++- nhgisxwalk/tests/test_nhgisxwalk.py | 16 ++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/nhgisxwalk/id_codes.py b/nhgisxwalk/id_codes.py index 0a2b681..769eace 100644 --- a/nhgisxwalk/id_codes.py +++ b/nhgisxwalk/id_codes.py @@ -314,13 +314,16 @@ def trt_gj(year, _id): """ + if not _id.startswith("G"): + raise ValueError("Check the NHGIS prefix of '%s'." % _id) + if year == "2010": indexer = 14 # slice out tract ID tract_id = _id[:indexer] else: msg = "Census year %s is not currently supported." % year - raise RuntimeError(msg) + raise ValueError(msg) return tract_id diff --git a/nhgisxwalk/tests/test_nhgisxwalk.py b/nhgisxwalk/tests/test_nhgisxwalk.py index b3244b7..757eacb 100644 --- a/nhgisxwalk/tests/test_nhgisxwalk.py +++ b/nhgisxwalk/tests/test_nhgisxwalk.py @@ -678,7 +678,7 @@ def test_generate_geoid_nan(self): observed_value = nhgisxwalk.id_codes.generate_geoid(numpy.nan) self.assertEqual(numpy.isnan(known_value), numpy.isnan(observed_value)) - def test_generate_geoid_digit_str(self): + def test_generate_geoid_digit_int_str(self): with self.assertRaises(TypeError): nhgisxwalk.id_codes.generate_geoid("1") @@ -686,7 +686,7 @@ def test_generate_geoid_digit_int(self): with self.assertRaises(TypeError): nhgisxwalk.id_codes.generate_geoid(1) - def test_generate_geoid_digit_str(self): + def test_generate_geoid_digit_float_str(self): with self.assertRaises(TypeError): nhgisxwalk.id_codes.generate_geoid("1.1") @@ -694,6 +694,10 @@ def test_generate_geoid_digit_float(self): with self.assertRaises(TypeError): nhgisxwalk.id_codes.generate_geoid(1.1) + def test_generate_geoid_digit_unkown_float_str(self): + with self.assertRaises(ValueError): + nhgisxwalk.id_codes.generate_geoid("1.1.1") + def test_generate_geoid_bad_char_int(self): with self.assertRaises(ValueError): nhgisxwalk.id_codes.generate_geoid("X1") @@ -702,6 +706,14 @@ def test_generate_geoid_bad_char_float(self): with self.assertRaises(ValueError): nhgisxwalk.id_codes.generate_geoid("X1.1") + def test_trt_gj_no_G(self): + with self.assertRaises(ValueError): + nhgisxwalk.id_codes.trt_gj("2010", "X1.1") + + def test_trt_gj_no_G(self): + with self.assertRaises(ValueError): + nhgisxwalk.id_codes.trt_gj("0000", "G123456789123456789") + if __name__ == "__main__": unittest.main() From d3ee8c1ceef61cd262876eff1f70a7f58cb2ffce Mon Sep 17 00:00:00 2001 From: James Gaboardi Date: Sat, 13 Jun 2020 20:05:57 -0400 Subject: [PATCH 3/4] more tests in id_codes.py [2] --- nhgisxwalk/tests/test_nhgisxwalk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nhgisxwalk/tests/test_nhgisxwalk.py b/nhgisxwalk/tests/test_nhgisxwalk.py index 757eacb..5c85785 100644 --- a/nhgisxwalk/tests/test_nhgisxwalk.py +++ b/nhgisxwalk/tests/test_nhgisxwalk.py @@ -710,7 +710,7 @@ def test_trt_gj_no_G(self): with self.assertRaises(ValueError): nhgisxwalk.id_codes.trt_gj("2010", "X1.1") - def test_trt_gj_no_G(self): + def test_trt_gj_bad_year(self): with self.assertRaises(ValueError): nhgisxwalk.id_codes.trt_gj("0000", "G123456789123456789") From 9fb1bf76238d25010e1396facaae1e7f39f08f6b Mon Sep 17 00:00:00 2001 From: James Gaboardi Date: Sat, 13 Jun 2020 20:30:16 -0400 Subject: [PATCH 4/4] add test for no supp file 1990 --- nhgisxwalk/tests/test_nhgisxwalk.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/nhgisxwalk/tests/test_nhgisxwalk.py b/nhgisxwalk/tests/test_nhgisxwalk.py index 5c85785..c8f4e9e 100644 --- a/nhgisxwalk/tests/test_nhgisxwalk.py +++ b/nhgisxwalk/tests/test_nhgisxwalk.py @@ -170,6 +170,23 @@ def test_xwalk_state_bgp1990_trt2010(self,): numpy.testing.assert_equal(knw_str_vals, obs_str_vals) numpy.testing.assert_allclose(knw_num_vals, obs_num_vals, atol=6) + def test_xwalk_bgp1990_trt2010_no_supp_error(self): + with self.assertRaises(RuntimeError): + obs_xwalk = nhgisxwalk.GeoCrossWalk( + base_xwalk_blk1990_blk2010, + source_year=_90, + target_year=_10, + source_geo=bgp, + target_geo=trt, + base_source_table=tab_data_path_1990, + supp_source_table=None, + input_var=input_vars_1990, + weight_var=input_var_tags, + stfips=stfips, + vectorized=False, + keep_base=False, + ) + def test_xwalk_extract_state_bgp1990_trt2010(self): known_target_nan_xwalk = numpy.empty((0, 7)) known_source_nan_xwalk = numpy.array(