Skip to content

Commit

Permalink
Merge branch 'main' into python39
Browse files Browse the repository at this point in the history
  • Loading branch information
rwedge committed Jan 29, 2021
2 parents 7625c11 + 5450641 commit 622df2a
Show file tree
Hide file tree
Showing 11 changed files with 89 additions and 51 deletions.
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@
# documentation.
html_theme_options = {
"github_url": "https://github.com/alteryx/featuretools",
"twitter_url": "https://twitter.com/featuretools_py"
"twitter_url": "https://twitter.com/AlteryxOSS",
}

# Add any paths that contain custom themes here, relative to this directory.
Expand Down
5 changes: 4 additions & 1 deletion docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,18 @@ Release Notes
* Enhancements
* Add Python 3.9 support without Koalas testing (:pr:`1318`)
* Fixes
* Calculate direct features uses default value if parent missing (:pr:`1312`)
* Changes
* Documentation Changes
* Update Twitter link to documentation toolbar (:pr:`1322`)
* Testing Changes
* Unpin python-graphviz package on Windows (:pr:`1296`)
* Reorganize and clean up tests (:pr:`1294`, :pr:`1303`, :pr:`1306`)
* Trigger tests on pull request events (:pr:`1304`, :pr:`1315`)
* Remove unnecessary test skips on Windows (:pr:`1320`)

Thanks to the following people for contributing to this release:
:user:`gsheni`, :user:`jeff-hernandez`, :user:`rwedge`, :user:`thehomebrewnerd`
:user:`gsheni`, :user:`jeff-hernandez`, :user:`rwedge`, :user:`thehomebrewnerd`, :user:`seriallazer`

**v0.23.0 Dec 31, 2020**
* Fixes
Expand Down
2 changes: 1 addition & 1 deletion docs/source/templates/layout.html
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
<a href="https://github.com/alteryx/featuretools#readme" target="_blank">
<img class="footer-image-github" src="{{ pathto('_static/images/github.svg', 1) }}" alt="GitHub">
</a>
<a href="https://twitter.com/featuretools_py?lang=en" target="_blank">
<a href="https://twitter.com/AlteryxOSS" target="_blank">
<img class="footer-image-twitter" src="{{ pathto('_static/images/twitter.svg', 1) }}" alt="Twitter">
</a>
</div>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,12 @@ def _calculate_direct_features(self, features, child_df, df_trie, progress_callb
# new column names (in the child entity) for the merge
col_map = {relationship.parent_variable.id: merge_var}
index_as_feature = None

fillna_dict = {}
for f in features:
feature_defaults = {name: f.default_value
for name in f.get_feature_names() if not pd.isna(f.default_value)}
fillna_dict.update(feature_defaults)
if f.base_features[0].get_name() == relationship.parent_variable.id:
index_as_feature = f
base_names = f.base_features[0].get_feature_names()
Expand All @@ -565,7 +570,7 @@ def _calculate_direct_features(self, features, child_df, df_trie, progress_callb

progress_callback(len(features) / float(self.num_features))

return new_df
return new_df.fillna(fillna_dict)

def _calculate_agg_features(self, features, frame, df_trie, progress_callback):
test_feature = features[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -867,10 +867,12 @@ def test_empty_path_approximate_full(pd_es):
approximate=Timedelta(10, 's'),
cutoff_time=cutoff_time)
vals1 = feature_matrix[dfeat.get_name()].tolist()
assert np.isnan(vals1[0])
assert np.isnan(vals1[1])

assert (vals1[0] == 0)
assert (vals1[1] == 0)
assert feature_matrix[agg_feat.get_name()].tolist() == [5, 1]


# todo: do we need to test this situation?
# def test_empty_path_approximate_partial(pd_es):
# pd_es = copy.deepcopy(pd_es)
Expand Down Expand Up @@ -1828,6 +1830,21 @@ def test_calc_feature_matrix_with_cutoff_df_and_instance_ids(es):
assert (feature_matrix[property_feature.get_name()] == labels).values.all()


def test_calculate_feature_matrix_returns_default_values(default_value_es):
sum_features = ft.Feature(default_value_es["transactions"]["value"],
parent_entity=default_value_es["sessions"], primitive=Sum)
sessions_sum = ft.Feature(sum_features,
entity=default_value_es["transactions"])

feature_matrix = ft.calculate_feature_matrix(features=[sessions_sum],
entityset=default_value_es)

feature_matrix = to_pandas(feature_matrix, index='id', sort_index=True)
expected_values = [2.0, 2.0, 1.0, 0.0]

assert (feature_matrix[sessions_sum.get_name()] == expected_values).values.all()


def test_entities_relationships(entities, relationships):
fm_1, features = ft.dfs(entities=entities,
relationships=relationships,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import sys
from datetime import datetime

import numpy as np
Expand Down Expand Up @@ -439,8 +438,6 @@ def dd_df(pd_df):
@pytest.fixture
def ks_df(pd_df):
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
return ks.from_pandas(pd_df)


Expand Down Expand Up @@ -749,8 +746,6 @@ def dd_parent_child(pd_parent_child):
@pytest.fixture
def ks_parent_child(pd_parent_child):
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
parent_df, child_df = pd_parent_child
parent_df = ks.from_pandas(parent_df)
child_df = ks.from_pandas(child_df)
Expand Down
69 changes: 58 additions & 11 deletions featuretools/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import copy
import sys

import composeml as cp
import dask.dataframe as dd
Expand Down Expand Up @@ -60,8 +59,6 @@ def dask_es(make_es):
@pytest.fixture
def ks_es(make_es):
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
ks_es = copy.deepcopy(make_es)
for entity in ks_es.entities:
cleaned_df = pd_to_ks_clean(entity.df).reset_index(drop=True)
Expand Down Expand Up @@ -143,8 +140,6 @@ def dask_diamond_es(pd_diamond_es):
@pytest.fixture
def ks_diamond_es(pd_diamond_es):
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
entities = {}
for entity in pd_diamond_es.entities:
entities[entity.id] = (ks.from_pandas(pd_to_ks_clean(entity.df)), entity.index, None, entity.variable_types)
Expand All @@ -157,6 +152,64 @@ def ks_diamond_es(pd_diamond_es):
return ft.EntitySet(id=pd_diamond_es.id, entities=entities, relationships=relationships)


@pytest.fixture(params=['pd_default_value_es', 'dask_default_value_es', 'ks_default_value_es'])
def default_value_es(request):
return request.getfixturevalue(request.param)


@pytest.fixture
def pd_default_value_es():
transactions = pd.DataFrame({
"id": [1, 2, 3, 4],
"session_id": ["a", "a", "b", "c"],
"value": [1, 1, 1, 1]
})

sessions = pd.DataFrame({
"id": ["a", "b"]
})

es = ft.EntitySet()
es.entity_from_dataframe(entity_id="transactions",
dataframe=transactions,
index="id")
es.entity_from_dataframe(entity_id="sessions",
dataframe=sessions,
index="id")

es.add_relationship(ft.Relationship(es["sessions"]["id"], es["transactions"]["session_id"]))
return es


@pytest.fixture
def dask_default_value_es(pd_default_value_es):
entities = {}
for entity in pd_default_value_es.entities:
entities[entity.id] = (dd.from_pandas(entity.df, npartitions=4), entity.index, None, entity.variable_types)

relationships = [(rel.parent_entity.id,
rel.parent_variable.name,
rel.child_entity.id,
rel.child_variable.name) for rel in pd_default_value_es.relationships]

return ft.EntitySet(id=pd_default_value_es.id, entities=entities, relationships=relationships)


@pytest.fixture
def ks_default_value_es(pd_default_value_es):
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
entities = {}
for entity in pd_default_value_es.entities:
entities[entity.id] = (ks.from_pandas(pd_to_ks_clean(entity.df)), entity.index, None, entity.variable_types)

relationships = [(rel.parent_entity.id,
rel.parent_variable.name,
rel.child_entity.id,
rel.child_variable.name) for rel in pd_default_value_es.relationships]

return ft.EntitySet(id=pd_default_value_es.id, entities=entities, relationships=relationships)


@pytest.fixture(params=['pd_home_games_es', 'dask_home_games_es', 'ks_home_games_es'])
def home_games_es(request):
return request.getfixturevalue(request.param)
Expand Down Expand Up @@ -198,8 +251,6 @@ def dask_home_games_es(pd_home_games_es):
@pytest.fixture
def ks_home_games_es(pd_home_games_es):
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
entities = {}
for entity in pd_home_games_es.entities:
entities[entity.id] = (ks.from_pandas(pd_to_ks_clean(entity.df)), entity.index, None, entity.variable_types)
Expand Down Expand Up @@ -235,8 +286,6 @@ def dd_mock_customer(pd_mock_customer):
@pytest.fixture
def ks_mock_customer(pd_mock_customer):
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
ks_mock_customer = copy.deepcopy(pd_mock_customer)
for entity in ks_mock_customer.entities:
cleaned_df = pd_to_ks_clean(entity.df).reset_index(drop=True)
Expand Down Expand Up @@ -320,8 +369,6 @@ def dask_entities():
@pytest.fixture
def koalas_entities():
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
cards_df = ks.DataFrame({"id": [1, 2, 3, 4, 5]})
transactions_df = ks.DataFrame({"id": [1, 2, 3, 4, 5, 6],
"card_id": [1, 2, 1, 3, 4, 5],
Expand Down
19 changes: 0 additions & 19 deletions featuretools/tests/entityset_tests/test_es.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import copy
import sys
from datetime import datetime

import dask.dataframe as dd
Expand Down Expand Up @@ -219,8 +218,6 @@ def dd_df(pd_df):
@pytest.fixture
def ks_df(pd_df):
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
return ks.from_pandas(pd_df)


Expand Down Expand Up @@ -286,8 +283,6 @@ def dd_df2(pd_df2):
@pytest.fixture
def ks_df2(pd_df2):
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
return ks.from_pandas(pd_df2)


Expand Down Expand Up @@ -322,8 +317,6 @@ def dd_df3(pd_df3):
@pytest.fixture
def ks_df3(pd_df3):
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
return ks.from_pandas(pd_df3)


Expand Down Expand Up @@ -384,8 +377,6 @@ def dd_df4(pd_df4):
@pytest.fixture
def ks_df4(pd_df4):
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
return ks.from_pandas(pd_to_ks_clean(pd_df4))


Expand Down Expand Up @@ -481,8 +472,6 @@ def dd_datetime1(pd_datetime1):
@pytest.fixture
def ks_datetime1(pd_datetime1):
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
return ks.from_pandas(pd_datetime1)


Expand Down Expand Up @@ -527,8 +516,6 @@ def dd_datetime2(pd_datetime2):
@pytest.fixture
def ks_datetime2(pd_datetime2):
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
return ks.from_pandas(pd_datetime2)


Expand Down Expand Up @@ -787,8 +774,6 @@ def dd_transactions_df(pd_transactions_df):
@pytest.fixture
def ks_transactions_df(pd_transactions_df):
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
return ks.from_pandas(pd_transactions_df)


Expand Down Expand Up @@ -1227,8 +1212,6 @@ def dd_datetime3(pd_datetime3):
@pytest.fixture
def ks_datetime3(pd_datetime3):
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
return ks.from_pandas(pd_datetime3)


Expand Down Expand Up @@ -1277,8 +1260,6 @@ def dd_index_df(pd_index_df):
@pytest.fixture
def ks_index_df(pd_index_df):
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
return ks.from_pandas(pd_index_df)


Expand Down
4 changes: 0 additions & 4 deletions featuretools/tests/entityset_tests/test_es_metadata.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import sys

import pandas as pd
import pytest
from dask import dataframe as dd
Expand Down Expand Up @@ -150,8 +148,6 @@ def dd_employee_df(pd_employee_df):
@pytest.fixture
def ks_employee_df(pd_employee_df):
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
return ks.from_pandas(pd_employee_df)


Expand Down
3 changes: 0 additions & 3 deletions featuretools/tests/entityset_tests/test_plotting.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import re
import sys

import graphviz
import pandas as pd
Expand Down Expand Up @@ -30,8 +29,6 @@ def dd_simple():
@pytest.fixture
def ks_simple():
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
es = ft.EntitySet("test")
df = ks.DataFrame({'foo': [1]})
es.entity_from_dataframe('test', df)
Expand Down
3 changes: 0 additions & 3 deletions featuretools/tests/synthesis/test_deep_feature_synthesis.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import copy
import sys

import dask.dataframe as dd
import pandas as pd
Expand Down Expand Up @@ -85,8 +84,6 @@ def dask_transform_es(pd_transform_es):
@pytest.fixture
def koalas_transform_es(pd_transform_es):
ks = pytest.importorskip('databricks.koalas', reason="Koalas not installed, skipping")
if sys.platform.startswith('win'):
pytest.skip('skipping Koalas tests for Windows')
es = ft.EntitySet(id=pd_transform_es.id)
for entity in pd_transform_es.entities:
es.entity_from_dataframe(entity_id=entity.id,
Expand Down

0 comments on commit 622df2a

Please sign in to comment.