Skip to content

Commit

Permalink
Merge pull request #85 from dmitrypolo/feature/change_test_layout
Browse files Browse the repository at this point in the history
Feature/change test layout
fixes #80
  • Loading branch information
eltonlaw authored Aug 1, 2019
2 parents fd2a757 + e8f0d92 commit 45412a4
Show file tree
Hide file tree
Showing 16 changed files with 250 additions and 272 deletions.
3 changes: 1 addition & 2 deletions impyute/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
""" Real-world/mock datasets and missingness corruptors to experiment with. """
from .base import randu
from .base import randn
from .base import test_data
from .base import mnist

__all__ = ["randu", "randn", "test_data", "mnist"]
__all__ = ["randu", "randn", "mnist"]
14 changes: 0 additions & 14 deletions impyute/dataset/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,20 +66,6 @@ def randn(theta=(0, 1), shape=(5, 5), missingness="mcar", thr=0.2, dtype="float"
return raw_data


def test_data(mask=np.zeros((3, 3), dtype=bool)):
""" Returns a dataset to use with tests (INTERNAL USE - FOR UNIT TESTING)
mask: True/False array, same size as dataset
Use True where missing values should occur and False everywhere else
th: float between[0,1]
Percentage of missing data in generated dataset
"""
shape = np.shape(mask)
data = np.reshape(np.arange(np.product(shape)), shape).astype("float")
data[mask] = np.nan
return data


def mnist(missingness="mcar", thr=0.2):
""" Loads corrupted MNIST
Expand Down
19 changes: 19 additions & 0 deletions impyute/util/testing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import numpy as np


def return_na_check(data):
"""Helper function for tests to check if the data returned is a
numpy array and that the imputed data has no NaN's.
Parameters
----------
data: numpy.ndarray
Data to impute.
Returns
-------
None
"""
assert isinstance(data, np.ndarray)
assert not np.isnan(data).any()
3 changes: 3 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[pytest]
filterwarnings =
ignore::RuntimeWarning
50 changes: 50 additions & 0 deletions test/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import os
import shutil

import pytest
import numpy as np


@pytest.fixture(scope='function')
def test_data():
def prepare_data(shape=(3, 3), pos1=0, pos2=0):
data = np.reshape(np.arange(np.product(shape)), shape).astype("float")
data[pos1, pos2] = np.nan
return data
return prepare_data


@pytest.fixture(scope='session')
def buck_test_data():
data = np.asarray([[1, 2, 3, 4, 5, 6, 7, 8],
[1, 4, 6, 8, 10, 12, 14, 16],
[0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4],
[0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4],
[3, 6, 9, 12, 15, 18, 21, 24],
[4, 8, 9, 16, 20, 24, 28, 32]])
data[0, 0] = np.nan
return data


@pytest.fixture(scope='session')
def knn_test_data():
n = 100
data = np.random.normal(size=n * n).reshape((n, n))
for _ in range(int(n * 0.3 * n)):
data[np.random.randint(n), np.random.randint(n)] = np.nan
return data


@pytest.fixture(scope='function')
def mw_data():
return np.arange(0, 25).reshape(5, 5).astype(float)


@pytest.fixture(scope='session')
def results_path(tmpdir_factory):
temp = tmpdir_factory.mktemp('logs')
p = os.path.realpath(str(temp))
log_path = os.path.join(p, 'results.txt')
yield log_path
if temp.exists():
shutil.rmtree(str(temp))
40 changes: 15 additions & 25 deletions test/deletion/test_complete_case.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,25 @@
"""test_complete_case.py"""
import numpy as np
from impyute.dataset import test_data
from impyute.deletion import complete_case
from impyute.util.testing import return_na_check

mask = np.zeros((5, 5), dtype=bool)
data_c = test_data(mask)
mask[0][0] = True
data_m = test_data(mask)
SHAPE = (5, 5)

def test_return_type():
""" Check return type, should return an np.ndarray"""
imputed = complete_case(data_m)
assert isinstance(imputed, np.ndarray)

def test_impute_no_missing_values():
""" After imputation, no change should occur"""
imputed = complete_case(data_m)
assert not np.isnan(imputed).any()
def test_complete_case_(test_data):
data = test_data(SHAPE)
imputed = complete_case(data)
return_na_check(imputed)

def test_impute_missing_values():
""" After imputation, no NaN's should exist"""
imputed = complete_case(data_m)

def test_impute_missing_values(test_data):
data = test_data(SHAPE)
imputed = complete_case(data)
assert np.shape(imputed) == (4, 5)

def test_imputed_values():
""" Assert values are as expected"""
imputed = complete_case(data_m)
expected = np.array([
[5., 6., 7., 8., 9.],
[10., 11., 12., 13., 14.],
[15., 16., 17., 18., 19.],
[20., 21., 22., 23., 24.]
])

def test_imputed_values(test_data):
data = test_data(SHAPE)
imputed = complete_case(data)
expected = np.arange(5, 25, dtype=float).reshape(4, 5)
assert np.equal(imputed, expected).all()
24 changes: 4 additions & 20 deletions test/imputation/cs/test_buck_iterative.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,8 @@
"""test_buck_iterative.py"""
import numpy as np
import impyute as impy
from impyute.util.testing import return_na_check

data = np.asarray([[1, 2, 3, 4, 5, 6, 7, 8],
[1, 4, 6, 8, 10, 12, 14, 16],
[0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4],
[0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4],
[3, 6, 9, 12, 15, 18, 21, 24],
[4, 8, 9, 16, 20, 24, 28, 32]])
mask = np.zeros((6, 8), dtype=bool)
data_c = data[mask]
data[0][0] = np.nan
data_m = data

def test_return_type():
""" Check return type, should return an np.ndarray"""
imputed = impy.buck_iterative(data_m)
assert isinstance(imputed, np.ndarray)

def test_impute_missing_values():
""" After imputation, no NaN's should exist"""
imputed = impy.buck_iterative(data_m)
assert not np.isnan(imputed).any()
def test_buck_iter(buck_test_data):
imputed = impy.buck_iterative(buck_test_data)
return_na_check(imputed)
43 changes: 14 additions & 29 deletions test/imputation/cs/test_central_tendency.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,23 @@
"""test_averagings.py"""
import numpy as np
import impyute as impy
from impyute.util.testing import return_na_check

mask = np.zeros((5, 5), dtype=bool)
data_c = impy.dataset.test_data(mask=mask)
mask[0][0] = True
data_m = impy.dataset.test_data(mask=mask)
SHAPE = (5, 5)

def test_mean_return_type():
""" Check return type, should return an np.ndarray"""
imputed = impy.mode(data_m)
assert isinstance(imputed, np.ndarray)

def test_mode_return_type():
""" Check return type, should return an np.ndarray"""
imputed = impy.mode(data_m)
assert isinstance(imputed, np.ndarray)
def test_mean(test_data):
data = test_data(SHAPE)
imputed = impy.mean(data)
return_na_check(imputed)

def test_median_return_type():
""" Check return type, should return an np.ndarray"""
imputed = impy.mode(data_m)
assert isinstance(imputed, np.ndarray)

def test_mean_impute_missing_values():
""" After imputation, no Nan's should exist"""
imputed = impy.mean(data_m)
assert not np.isnan(imputed).any()
def test_mode(test_data):
data = test_data(SHAPE)
imputed = impy.mode(data)
return_na_check(imputed)

def test_mode_impute_missing_values():
""" After imputation, no NaN's should exist"""
imputed = impy.mode(data_m)
assert not np.isnan(imputed).any()

def test_median_impute_missing_values():
""" After imputation, no NaN's should exist"""
imputed = impy.median(data_m)
assert not np.isnan(imputed).any()
def test_median(test_data):
data = test_data(SHAPE)
imputed = impy.median(data)
return_na_check(imputed)
19 changes: 6 additions & 13 deletions test/imputation/cs/test_em.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,11 @@
"""test_em.py"""
import numpy as np
import impyute as impy
from impyute.util.testing import return_na_check

mask = np.zeros((5, 5), dtype=bool)
data_c = impy.dataset.test_data(mask=mask)
mask[0][0] = True
data_m = impy.dataset.test_data(mask=mask)
SHAPE = (5, 5)

def test_return_type():
""" Check return type, should return an np.ndarray"""
imputed = impy.em(data_m)
assert isinstance(imputed, np.ndarray)

def test_impute_missing_values():
""" After imputation, no NaN's should exist"""
imputed = impy.em(data_m)
assert not np.isnan(imputed).any()
def test_em_(test_data):
data = test_data(SHAPE)
imputed = impy.em(data)
return_na_check(imputed)
38 changes: 14 additions & 24 deletions test/imputation/cs/test_fast_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,27 @@
import functools
import numpy as np
import impyute as impy
from impyute.util.testing import return_na_check
# pylint:disable=invalid-name

n = 100
data_c = np.random.normal(size=n*n).reshape((n, n))
data_m1 = data_c.copy()
for _ in range(int(n*0.3*n)):
data_m1[np.random.randint(n)][np.random.randint(n)] = np.nan
SHAPE = (5, 5)

def test_return_type():
""" Check return type, should return an np.ndarray"""
imputed = impy.fast_knn(data_m1)
assert isinstance(imputed, np.ndarray)

def test_impute_missing_values():
""" After imputation, no NaN's should exist"""
imputed = impy.fast_knn(data_m1)
assert not np.isnan(imputed).any()
def test_return_type(knn_test_data):
imputed = impy.fast_knn(knn_test_data)
return_na_check(imputed)

data_m2 = np.array([[0., 1., np.nan, 3., 4.],
[5., 6., 7., 8., 9.],
[10., 11., 12., 13., 14.],
[15., 16., 17., 18., 19.],
[20., 21., 22., 23., 24.]])

def test_impute_value():
def test_impute_value(test_data):
"fast_knn using standard idw"
imputed = impy.fast_knn(data_m2, k=2)
assert np.isclose(imputed[0][2], 8.38888888888889)
data = test_data(SHAPE, 0, 2)
imputed = impy.fast_knn(data, k=2)
assert np.isclose(imputed[0, 2], 8.38888888888889)

def test_impute_value_custom_idw():

def test_impute_value_custom_idw(test_data):
"fast_knn using custom idw"
data = test_data(SHAPE, 0, 2)
idw = functools.partial(impy.util.inverse_distance_weighting.shepards, power=1)
imputed = impy.fast_knn(data_m2, k=2, idw=idw)
assert np.isclose(imputed[0][2], 8.913911092686593)
imputed = impy.fast_knn(data, k=2, idw=idw)
assert np.isclose(imputed[0, 2], 8.913911092686593)
19 changes: 6 additions & 13 deletions test/imputation/cs/test_random.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,11 @@
"""test_random_imputation.py"""
import numpy as np
import impyute as impy
from impyute.util.testing import return_na_check

mask = np.zeros((3, 3), dtype=bool)
data_c = impy.dataset.test_data(mask=mask)
mask[0][0] = True
data_m = impy.dataset.test_data(mask=mask)
SHAPE = (3, 3)

def test_return_type():
"""Check return type, should return an np.ndarray"""
imputed = impy.random(data_m)
assert isinstance(imputed, np.ndarray)

def test_impute_missing_values():
"""After imputation, no NaN's should exist"""
imputed = impy.random(data_m)
assert not np.isnan(imputed).any()
def test_random_(test_data):
data = test_data(SHAPE)
imputed = impy.random(data)
return_na_check(imputed)
Loading

0 comments on commit 45412a4

Please sign in to comment.