Skip to content

Commit

Permalink
Fixed tests
Browse files Browse the repository at this point in the history
  • Loading branch information
MatsMoll committed May 26, 2024
1 parent c21c9dc commit e419ed1
Show file tree
Hide file tree
Showing 21 changed files with 45 additions and 160 deletions.
1 change: 0 additions & 1 deletion Dockerfile.test
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ WORKDIR /opt/app
RUN pip install poetry
RUN poetry config virtualenvs.create false
RUN pip install pip --upgrade
RUN apt-get update && apt-get install -y libpq-dev gcc

COPY ./pyproject.toml /opt/app/pyproject.toml

Expand Down
2 changes: 2 additions & 0 deletions aligned/data_source/batch_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def __init__(self) -> None:
AzureBlobCsvDataSource,
AzureBlobDeltaDataSource,
AzureBlobParquetDataSource,
AzureBlobPartitionedParquetDataSource,
)
from aligned.schemas.feature_view import FeatureViewReferenceSource
from aligned.schemas.model import ModelSource
Expand All @@ -65,6 +66,7 @@ def __init__(self) -> None:
AzureBlobCsvDataSource,
AzureBlobDeltaDataSource,
AzureBlobParquetDataSource,
AzureBlobPartitionedParquetDataSource,
# Aligned Related Sources
JoinDataSource,
JoinAsofDataSource,
Expand Down
2 changes: 2 additions & 0 deletions aligned/retrival_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -1698,6 +1698,7 @@ async def to_lazy_polars(self) -> pl.LazyFrame:
raise error
self.logger(f'Results from {type(self.job).__name__} - {job_name}')
self.logger(df.columns)
self.logger(df)
self.logger(df.head(10).collect())
return df

Expand Down Expand Up @@ -1810,6 +1811,7 @@ async def compute_derived_features_polars(self, df: pl.LazyFrame) -> pl.LazyFram

if round_expressions:
df = df.with_columns(round_expressions)

return df

async def compute_derived_features_pandas(self, df: pd.DataFrame) -> pd.DataFrame:
Expand Down
21 changes: 4 additions & 17 deletions aligned/tests/test_train_test_validate_set.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import pytest

from pathlib import Path
from aligned.feature_store import ContractStore
from aligned.retrival_job import split
from aligned.schemas.folder import DatasetMetadata
Expand Down Expand Up @@ -58,18 +57,6 @@ async def test_train_test_validate_set(titanic_feature_store: ContractStore) ->
async def test_train_test_validate_set_new(titanic_feature_store: ContractStore) -> None:
from aligned.schemas.folder import JsonDatasetStore

unlink_paths = [
'test_data/titanic-sets.json',
'test_data/titanic-train.csv',
'test_data/titanic-test.csv',
'test_data/titanic-validate.csv',
]

for path_str in unlink_paths:
path = Path(path_str)
if path.exists():
path.unlink()

dataset_size = 100
train_fraction = 0.6
validation_fraction = 0.2
Expand All @@ -78,7 +65,7 @@ async def test_train_test_validate_set_new(titanic_feature_store: ContractStore)
test_size = int(round(dataset_size * (1 - train_fraction - validation_fraction)))
validate_size = int(round(dataset_size * validation_fraction))

dataset_store = FileSource.json_at('test_data/titanic-sets.json')
dataset_store = FileSource.json_at('test_data/temp/titanic-sets.json')
dataset = await (
titanic_feature_store.feature_view('titanic')
.all(limit=dataset_size)
Expand All @@ -88,9 +75,9 @@ async def test_train_test_validate_set_new(titanic_feature_store: ContractStore)
metadata=DatasetMetadata(
id='titanic_test',
),
train_source=FileSource.csv_at('test_data/titanic-train.csv'),
test_source=FileSource.csv_at('test_data/titanic-test.csv'),
validate_source=FileSource.csv_at('test_data/titanic-validate.csv'),
train_source=FileSource.csv_at('test_data/temp/titanic-train.csv'),
test_source=FileSource.csv_at('test_data/temp/titanic-test.csv'),
validate_source=FileSource.csv_at('test_data/temp/titanic-validate.csv'),
)
)

Expand Down
33 changes: 16 additions & 17 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import polars as pl
import pytest
import pytest_asyncio

from aligned import (
Bool,
Expand Down Expand Up @@ -210,8 +209,8 @@ class BreastDiagnoseFeatureView(FeatureView):
return BreastDiagnoseFeatureView()


@pytest_asyncio.fixture
async def breast_scan_without_timestamp_feature_store(
@pytest.fixture
def breast_scan_without_timestamp_feature_store(
breast_scan_feature_viewout_with_datetime: FeatureView,
) -> ContractStore:
store = ContractStore.empty()
Expand Down Expand Up @@ -344,17 +343,17 @@ class BreastDiagnoseFeatureView(FeatureView):
return BreastDiagnoseFeatureView()


@pytest_asyncio.fixture
async def breast_scan_with_timestamp_feature_store(
@pytest.fixture
def breast_scan_with_timestamp_feature_store(
breast_scan_feature_view_with_datetime: FeatureView,
) -> ContractStore:
store = ContractStore.empty()
store.add_feature_view(breast_scan_feature_view_with_datetime)
return store


@pytest_asyncio.fixture
async def breast_scan_with_timestamp_and_aggregation_feature_store(
@pytest.fixture
def breast_scan_with_timestamp_and_aggregation_feature_store(
breast_scan_feature_view_with_datetime_and_aggregation: FeatureView,
) -> ContractStore:
store = ContractStore.empty()
Expand Down Expand Up @@ -486,8 +485,8 @@ class TitanicPassenger(FeatureView):
return TitanicPassenger()


@pytest_asyncio.fixture
async def titanic_feature_store(
@pytest.fixture
def titanic_feature_store(
titanic_feature_view: FeatureView,
titanic_feature_view_parquet: FeatureView,
titanic_model: ModelContractWrapper,
Expand Down Expand Up @@ -537,17 +536,17 @@ class TitanicPassenger(FeatureView):
return TitanicPassenger()


@pytest_asyncio.fixture
async def alot_of_transforation_feature_store(
@pytest.fixture
def alot_of_transforation_feature_store(
alot_of_transforations_feature_view: FeatureView,
) -> ContractStore:
feature_store = ContractStore.empty()
feature_store.add_feature_view(alot_of_transforations_feature_view)
return feature_store


@pytest_asyncio.fixture
async def combined_view(
@pytest.fixture
def combined_view(
titanic_feature_view: FeatureView, breast_scan_feature_viewout_with_datetime: FeatureView
) -> CombinedFeatureView:
class SomeCombinedView(CombinedFeatureView):
Expand All @@ -565,8 +564,8 @@ class SomeCombinedView(CombinedFeatureView):
return SomeCombinedView()


@pytest_asyncio.fixture
async def combined_feature_store(
@pytest.fixture
def combined_feature_store(
titanic_feature_view: FeatureView,
breast_scan_feature_viewout_with_datetime: FeatureView,
combined_view: CombinedFeatureView,
Expand Down Expand Up @@ -647,8 +646,8 @@ class Titanic:
return Titanic


@pytest_asyncio.fixture
async def titanic_feature_store_scd(
@pytest.fixture
def titanic_feature_store_scd(
titanic_feature_view_scd: FeatureView,
titanic_feature_view_parquet: FeatureView,
titanic_model_scd: ModelContractWrapper,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ sqlalchemy = "^2.0.19"
printf-log-formatter = "^0.3.0"
isort = "^5.12.0"
black = "^23.7.0"
psycopg2 = "^2.9.6"
# psycopg2 = "^2.9.6"

[build-system]
requires = ["poetry-core>=1.0.0"]
Expand Down
14 changes: 7 additions & 7 deletions test_data/credit_history.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
bankruptcies,due_sum,credit_card_due,dob_ssn,event_timestamp,student_loan_due
0,30747,8419,19530219_5179,1587924064746575,22328
0,5459,2944,19520816_8737,1587924064746575,2515
0,33833,833,19860413_2537,1587924064746575,33000
0,54891,5936,19530219_5179,1588010464746575,48955
0,11076,1575,19520816_8737,1588010464746575,9501
0,41773,6263,19860413_2537,1588010464746575,35510
event_timestamp,student_loan_due,due_sum,credit_card_due,bankruptcies,dob_ssn
1587924064746575,22328,30747,8419,0,19530219_5179
1587924064746575,2515,5459,2944,0,19520816_8737
1587924064746575,33000,33833,833,0,19860413_2537
1588010464746575,48955,54891,5936,0,19530219_5179
1588010464746575,9501,11076,1575,0,19520816_8737
1588010464746575,35510,41773,6263,0,19860413_2537
Binary file modified test_data/credit_history.parquet
Binary file not shown.
Binary file modified test_data/credit_history_agg.parquet
Binary file not shown.
Binary file modified test_data/credit_history_mater.parquet
Binary file not shown.
6 changes: 3 additions & 3 deletions test_data/data/csv_iso.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
id,other,et,timestamp
1,foo,2024-05-01T19:45:30.713911+UTC,2024-05-01T19:45:30.714114+UTC
2,bar,2024-04-30T19:45:30.714109+UTC,2024-05-02T19:45:30.714115+UTC
3,baz,2024-04-29T19:45:30.714113+UTC,2024-05-03T19:45:30.714115+UTC
1,foo,2024-05-26T17:13:22.023609+UTC,2024-05-26T17:13:22.023613+UTC
2,bar,2024-05-25T17:13:22.023611+UTC,2024-05-27T17:13:22.023613+UTC
3,baz,2024-05-24T17:13:22.023612+UTC,2024-05-28T17:13:22.023614+UTC
6 changes: 3 additions & 3 deletions test_data/data/csv_unix.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
id,other,et,timestamp
1,foo,1714592730713911,1714592730714114
2,bar,1714506330714109,1714679130714115
3,baz,1714419930714113,1714765530714115
1,foo,1716743602023609,1716743602023613
2,bar,1716657202023611,1716830002023613
3,baz,1716570802023612,1716916402023614
Binary file modified test_data/data/parquet_iso.parquet
Binary file not shown.
Binary file modified test_data/data/parquet_unix.parquet
Binary file not shown.
14 changes: 7 additions & 7 deletions test_data/loan.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
loan_amount,loan_status,event_timestamp,loan_id,personal_income
35000,True,1587924064746575,10000,59000
1000,False,1587924064746575,10001,9600
5500,True,1587924064746575,10002,9600
35000,True,1588010464746575,10000,65500
35000,True,1588010464746575,10001,54400
2500,True,1588010464746575,10002,9900
event_timestamp,loan_status,loan_amount,loan_id,personal_income
1587924064746575,True,35000,10000,59000
1587924064746575,False,1000,10001,9600
1587924064746575,True,5500,10002,9600
1588010464746575,True,35000,10000,65500
1588010464746575,True,35000,10001,54400
1588010464746575,True,2500,10002,9900
Binary file modified test_data/loan.parquet
Binary file not shown.
Binary file modified test_data/test_model.parquet
Binary file not shown.
1 change: 0 additions & 1 deletion test_data/titanic-sets.json

This file was deleted.

21 changes: 0 additions & 21 deletions test_data/titanic-test.csv

This file was deleted.

61 changes: 0 additions & 61 deletions test_data/titanic-train.csv

This file was deleted.

21 changes: 0 additions & 21 deletions test_data/titanic-validate.csv

This file was deleted.

0 comments on commit e419ed1

Please sign in to comment.