-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathtest_utilities.py
39 lines (31 loc) · 1.44 KB
/
test_utilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# This file is part of ipumspy.
# For copyright and licensing information, see the NOTICE and LICENSE files
# in this project's top-level directory, and also on-line at:
# https://github.com/ipums/ipumspy
from pathlib import Path
import numpy as np
import pandas as pd
import pytest
import vcr
from ipumspy import readers
from ipumspy.utilities import tabulate
def test_tabulate(fixtures_path: Path):
"""
Confirm that tabulate functions as expected
"""
ddi = readers.read_ipums_ddi(fixtures_path / "cps_00006.xml")
data = readers.read_microdata(ddi, fixtures_path / "cps_00006.dat.gz")
year_info = ddi.get_variable_info("YEAR")
crosstab_df = tabulate(year_info, data)
assert list(crosstab_df.columns) == ["counts", "pct"]
assert (crosstab_df["counts"]).all() == (np.array([4065, 3603])).all()
assert (crosstab_df["pct"]).all() == (np.array([0.530125, 0.469875])).all()
ddi = readers.read_ipums_ddi(fixtures_path / "cps_00006.xml")
data = readers.read_microdata(ddi, fixtures_path / "cps_00006.dat.gz")
month_info = ddi.get_variable_info("MONTH")
crosstab_df = tabulate(month_info, data)
assert list(crosstab_df.columns) == ["val", "lab", "counts", "pct"]
assert (crosstab_df["val"]).all() == (np.array([3])).all()
assert list(crosstab_df["lab"]) == ["March"]
assert (crosstab_df["counts"]).all() == (np.array([7668])).all()
assert (crosstab_df["pct"]).all() == (np.array([1.0])).all()