From 9e5b686fedaee77915f6f78ce5de093f8e217518 Mon Sep 17 00:00:00 2001 From: "Mats E. Mollestad" Date: Thu, 2 Nov 2023 09:07:20 +0100 Subject: [PATCH] fix: some event timestamp bugs --- aligned/compiler/feature_factory.py | 4 +- aligned/data_source/batch_data_source.py | 6 ++ aligned/feature_source.py | 4 +- aligned/feature_store.py | 58 ++++++----- aligned/local/job.py | 41 ++++++-- aligned/request/retrival_request.py | 27 ++++- aligned/retrival_job.py | 16 +-- aligned/sources/local.py | 11 ++- aligned/sources/tests/test_parquet.py | 66 ++++++++++++- aligned/sources/tests/test_psql.py | 44 ++++++++- aligned/tests/test_model_target.py | 2 +- conftest.py | 121 +++++++++++++++++++++++ test_data/feature-store.json | 2 +- 13 files changed, 346 insertions(+), 56 deletions(-) diff --git a/aligned/compiler/feature_factory.py b/aligned/compiler/feature_factory.py index aeff6126..aa5f6287 100644 --- a/aligned/compiler/feature_factory.py +++ b/aligned/compiler/feature_factory.py @@ -983,7 +983,9 @@ def __init__(self, ttl: timedelta | None = None): def event_timestamp(self) -> EventTimestampFeature: return EventTimestampFeature( - name=self.name, ttl=self.ttl.total_seconds() if self.ttl else None, description=self._description + name=self.name, + ttl=int(self.ttl.total_seconds()) if self.ttl else None, + description=self._description, ) diff --git a/aligned/data_source/batch_data_source.py b/aligned/data_source/batch_data_source.py index 94a0389b..ba9a49f5 100644 --- a/aligned/data_source/batch_data_source.py +++ b/aligned/data_source/batch_data_source.py @@ -221,6 +221,12 @@ async def freshness(self, event_timestamp: EventTimestamp) -> datetime | None: .freshness() ) """ + from aligned.data_file import DataFileReference + from aligned.sources.local import data_file_freshness + + if isinstance(self, DataFileReference): + return await data_file_freshness(self, event_timestamp.name) + raise NotImplementedError(f'Freshness is not implemented for {type(self)}.') diff --git a/aligned/feature_source.py b/aligned/feature_source.py index ec4da70b..5d2a7019 100644 --- a/aligned/feature_source.py +++ b/aligned/feature_source.py @@ -28,7 +28,7 @@ def features_for(self, facts: RetrivalJob, request: FeatureRequest) -> RetrivalJ async def freshness_for( self, locations: dict[FeatureLocation, EventTimestamp] - ) -> dict[FeatureLocation, datetime]: + ) -> dict[FeatureLocation, datetime | None]: raise NotImplementedError() @@ -136,7 +136,7 @@ def all_between(self, start_date: datetime, end_date: datetime, request: Feature async def freshness_for( self, locations: dict[FeatureLocation, EventTimestamp] - ) -> dict[FeatureLocation, datetime]: + ) -> dict[FeatureLocation, datetime | None]: locs = list(locations.keys()) results = await asyncio.gather( *[self.sources[loc.identifier].freshness(locations[loc]) for loc in locs] diff --git a/aligned/feature_store.py b/aligned/feature_store.py index f034f1cb..4e09a223 100644 --- a/aligned/feature_store.py +++ b/aligned/feature_store.py @@ -289,11 +289,8 @@ def features_for( feature_names = set() - if event_timestamp_column: + if event_timestamp_column and requests.needs_event_timestamp: feature_names.add(event_timestamp_column) - if isinstance(entities, dict) and event_timestamp_column in entities: - length = len(list(entities.values())[0]) - entities[event_timestamp_column] = [datetime.utcnow()] * length for view, feature_set in feature_request.grouped_features.items(): if feature_set != {'*'}: @@ -345,8 +342,6 @@ def _requests_for( requests: list[RetrivalRequest] = [] entity_names = set() - needs_event_timestamp = False - for location in feature_request.locations: location_name = location.name if location.location == 'model': @@ -358,8 +353,6 @@ def _requests_for( request = view.request_for(features[location], location_name) requests.append(request) entity_names.update(request.entity_names) - if request.event_timestamp: - needs_event_timestamp = True elif location_name in combined_feature_views: cfv = combined_feature_views[location_name] @@ -370,8 +363,6 @@ def _requests_for( requests.extend(sub_requests.needed_requests) for request in sub_requests.needed_requests: entity_names.update(request.entity_names) - if request.event_timestamp: - needs_event_timestamp = True elif location_name in feature_views: feature_view = feature_views[location_name] @@ -382,16 +373,18 @@ def _requests_for( requests.extend(sub_requests.needed_requests) for request in sub_requests.needed_requests: entity_names.update(request.entity_names) - if request.event_timestamp: - needs_event_timestamp = True else: raise ValueError( f'Unable to find: {location_name}, ' f'availible views are: {combined_feature_views.keys()}, and: {feature_views.keys()}' ) - if needs_event_timestamp and event_timestamp_column: + if event_timestamp_column: entity_names.add(event_timestamp_column) + requests = [request.with_event_timestamp_column(event_timestamp_column) for request in requests] + + else: + requests = [request.without_event_timestamp() for request in requests] return FeatureRequest( FeatureLocation.model('custom features'), @@ -688,14 +681,14 @@ def features_for( return job.select_columns(request.features_to_include) - async def freshness(self) -> dict[FeatureLocation, datetime]: + async def freshness(self) -> dict[FeatureLocation, datetime | None]: from aligned.schemas.feature import EventTimestamp locs: dict[FeatureLocation, EventTimestamp] = {} for req in self.request().needed_requests: if req.event_timestamp: - locs[req.location] + locs[req.location] = req.event_timestamp return await self.store.feature_source.freshness_for(locs) @@ -782,10 +775,14 @@ def process_features(self, input: RetrivalJob | ConvertableToRetrivalJob) -> Ret .select_columns(request.features_to_include) ) - def predictions_for(self, entities: ConvertableToRetrivalJob | RetrivalJob) -> RetrivalJob: + def predictions_for( + self, entities: ConvertableToRetrivalJob | RetrivalJob, event_timestamp_column: str | None = None + ) -> RetrivalJob: location_id = self.location.identifier - return self.store.features_for(entities, features=[f'{location_id}:*']) + return self.store.features_for( + entities, features=[f'{location_id}:*'], event_timestamp_column=event_timestamp_column + ) def all_predictions(self, limit: int | None = None) -> RetrivalJob: @@ -912,7 +909,9 @@ class SupervisedModelFeatureStore: model: ModelSchema store: FeatureStore - def features_for(self, entities: ConvertableToRetrivalJob | RetrivalJob) -> SupervisedJob: + def features_for( + self, entities: ConvertableToRetrivalJob | RetrivalJob, event_timestamp_column: str | None = None + ) -> SupervisedJob: """Loads the features and labels for a model ```python @@ -956,9 +955,11 @@ def features_for(self, entities: ConvertableToRetrivalJob | RetrivalJob) -> Supe else: raise ValueError('Found no targets in the model') - request = self.store.requests_for(RawStringFeatureRequest(features)) + request = self.store.requests_for( + RawStringFeatureRequest(features), event_timestamp_column=event_timestamp_column + ) target_request = self.store.requests_for( - RawStringFeatureRequest(target_features) + RawStringFeatureRequest(target_features), event_timestamp_column=event_timestamp_column ).without_event_timestamp(name_sufix='target') total_request = FeatureRequest( @@ -972,7 +973,9 @@ def features_for(self, entities: ConvertableToRetrivalJob | RetrivalJob) -> Supe target_columns=targets, ) - def predictions_for(self, entities: ConvertableToRetrivalJob | RetrivalJob) -> RetrivalJob: + def predictions_for( + self, entities: ConvertableToRetrivalJob | RetrivalJob, event_timestamp_column: str | None = None + ) -> RetrivalJob: """Loads the predictions and labels / ground truths for a model ```python @@ -1017,7 +1020,9 @@ def predictions_for(self, entities: ConvertableToRetrivalJob | RetrivalJob) -> R labels = pred_view.labels() target_features = {feature.identifier for feature in target_features} pred_features = {f'model:{self.model.name}:{feature.name}' for feature in labels} - request = self.store.requests_for(RawStringFeatureRequest(pred_features)) + request = self.store.requests_for( + RawStringFeatureRequest(pred_features), event_timestamp_column=event_timestamp_column + ) target_request = self.store.requests_for( RawStringFeatureRequest(target_features) ).without_event_timestamp(name_sufix='target') @@ -1134,12 +1139,17 @@ def previous(self, days: int = 0, minutes: int = 0, seconds: int = 0) -> Retriva start_date = end_date - timedelta(days=days, minutes=minutes, seconds=seconds) return self.between_dates(start_date, end_date) - def features_for(self, entities: ConvertableToRetrivalJob | RetrivalJob) -> RetrivalJob: + def features_for( + self, entities: ConvertableToRetrivalJob | RetrivalJob, event_timestamp_column: str | None = None + ) -> RetrivalJob: request = self.view.request_all if self.feature_filter: request = self.view.request_for(self.feature_filter) + if not event_timestamp_column: + request = request.without_event_timestamp() + if isinstance(entities, RetrivalJob): entity_job = entities else: @@ -1256,7 +1266,7 @@ async def batch_write(self, values: ConvertableToRetrivalJob | RetrivalJob) -> N with feature_view_write_time.labels(self.view.name).time(): await self.source.write(job, job.retrival_requests) - async def freshness(self) -> datetime: + async def freshness(self) -> datetime | None: view = self.view if not view.event_timestamp: diff --git a/aligned/local/job.py b/aligned/local/job.py index aedaa7f5..11f32f3d 100644 --- a/aligned/local/job.py +++ b/aligned/local/job.py @@ -226,10 +226,20 @@ async def file_transformations(self, df: pl.LazyFrame) -> pl.LazyFrame: result = await self.facts.to_polars() event_timestamp_col = 'aligned_event_timestamp' - using_event_timestamp = False - if 'event_timestamp' in result.columns: - using_event_timestamp = True - result = result.rename({'event_timestamp': event_timestamp_col}) + + event_timestamp_entity_columns = [ + req.event_timestamp_request.entity_column for req in self.requests if req.event_timestamp_request + ] + event_timestamp_entity_column = None + did_rename_event_timestamp = False + + if event_timestamp_entity_columns: + event_timestamp_entity_column = event_timestamp_entity_columns[0] + + if event_timestamp_entity_column and event_timestamp_entity_column in result: + result = result.rename({event_timestamp_entity_column: event_timestamp_col}) + did_rename_event_timestamp = True + row_id_name = 'row_id' result = result.with_row_count(row_id_name) @@ -237,10 +247,17 @@ async def file_transformations(self, df: pl.LazyFrame) -> pl.LazyFrame: entity_names = request.entity_names all_names = request.all_required_feature_names.union(entity_names) + if request.event_timestamp_request: + using_event_timestamp = event_timestamp_entity_column is not None + else: + using_event_timestamp = False + if request.event_timestamp: all_names.add(request.event_timestamp.name) - request_features = all_names + all_names = list(all_names) + + request_features = list(all_names) if isinstance(self.source, ColumnFeatureMappable): request_features = self.source.feature_identifier_for(all_names) @@ -259,7 +276,8 @@ async def file_transformations(self, df: pl.LazyFrame) -> pl.LazyFrame: result = result.with_columns(pl.col(entity.name).cast(entity.dtype.polars_type)) column_selects = list(entity_names.union({'row_id'})) - if request.event_timestamp: + + if using_event_timestamp: column_selects.append(event_timestamp_col) # Need to only select the relevent entities and row_id @@ -274,7 +292,7 @@ async def file_transformations(self, df: pl.LazyFrame) -> pl.LazyFrame: aggregated_df = await self.aggregate_over(group, features, new_result, event_timestamp_col) new_result = new_result.join(aggregated_df, on='row_id', how='left') - if request.event_timestamp: + if request.event_timestamp and using_event_timestamp: field = request.event_timestamp.name ttl = request.event_timestamp.ttl @@ -292,13 +310,16 @@ async def file_transformations(self, df: pl.LazyFrame) -> pl.LazyFrame: pl.col(field).is_null() | (pl.col(field) <= pl.col(event_timestamp_col)) ) new_result = new_result.sort(field, descending=True).select(pl.exclude(field)) + elif request.event_timestamp: + new_result = new_result.sort([row_id_name, request.event_timestamp.name], descending=True) unique = new_result.unique(subset=row_id_name, keep='first') - result = result.join(unique, on=row_id_name, how='left') + column_selects.remove('row_id') + result = result.join(unique.select(pl.exclude(column_selects)), on=row_id_name, how='left') result = result.select(pl.exclude('.*_right$')) - if using_event_timestamp: - result = result.rename({event_timestamp_col: 'event_timestamp'}) + if did_rename_event_timestamp: + result = result.rename({event_timestamp_col: event_timestamp_entity_column}) return result.select([pl.exclude('row_id')]) diff --git a/aligned/request/retrival_request.py b/aligned/request/retrival_request.py index 19faf2aa..2cc4a353 100644 --- a/aligned/request/retrival_request.py +++ b/aligned/request/retrival_request.py @@ -10,7 +10,7 @@ class EventTimestampRequest(Codable): event_timestamp: EventTimestamp - entity_column: str = field(default='event_timestamp') + entity_column: str | None = field(default=None) @dataclass @@ -59,8 +59,7 @@ def __init__( self.event_timestamp_request = event_timestamp_request elif event_timestamp: self.event_timestamp_request = EventTimestampRequest( - event_timestamp=event_timestamp, - entity_column=entity_timestamp_columns or 'event_timestamp', + event_timestamp=event_timestamp, entity_column=entity_timestamp_columns ) self.features_to_include = features_to_include or self.all_feature_names @@ -162,6 +161,11 @@ def aggregate_over(self) -> dict[AggregateOver, set[AggregatedFeature]]: return features def without_event_timestamp(self, name_sufix: str | None = None) -> 'RetrivalRequest': + + request = None + if self.event_timestamp_request: + request = EventTimestampRequest(self.event_timestamp_request.event_timestamp, None) + return RetrivalRequest( name=f'{self.name}{name_sufix or ""}', location=self.location, @@ -169,6 +173,21 @@ def without_event_timestamp(self, name_sufix: str | None = None) -> 'RetrivalReq features=self.features, derived_features=self.derived_features, aggregated_features=self.aggregated_features, + event_timestamp_request=request, + ) + + def with_event_timestamp_column(self, column: str) -> 'RetrivalRequest': + et_request = None + if self.event_timestamp_request: + et_request = EventTimestampRequest(self.event_timestamp_request.event_timestamp, column) + return RetrivalRequest( + name=self.name, + location=self.location, + entities=self.entities, + features=self.features, + derived_features=self.derived_features, + aggregated_features=self.aggregated_features, + event_timestamp_request=et_request, ) @staticmethod @@ -187,7 +206,7 @@ def combine(requests: list['RetrivalRequest']) -> list['RetrivalRequest']: features=request.features, derived_features=request.derived_features, aggregated_features=request.aggregated_features, - event_timestamp=request.event_timestamp, + event_timestamp_request=request.event_timestamp_request, ) returned_features[fv_name] = request.returned_features else: diff --git a/aligned/retrival_job.py b/aligned/retrival_job.py index 34d48ee7..2a95d773 100644 --- a/aligned/retrival_job.py +++ b/aligned/retrival_job.py @@ -1189,6 +1189,7 @@ async def to_polars(self) -> pl.LazyFrame: features_to_check = {feature.derived_feature for feature in request.aggregated_features} for feature in features_to_check: + if feature.dtype == FeatureType('').bool: df = df.with_columns(pl.col(feature.name).cast(pl.Int8).cast(pl.Boolean)) elif feature.dtype == FeatureType('').datetime: @@ -1213,13 +1214,14 @@ async def to_polars(self) -> pl.LazyFrame: if feature.name not in df.columns: continue current_dtype = df.select([feature.name]).dtypes[0] - if isinstance(current_dtype, pl.Datetime): - continue - df = df.with_columns( - (pl.col(feature.name).cast(pl.Int64) * 1000) - .cast(pl.Datetime(time_zone='UTC')) - .alias(feature.name) - ) + + if not isinstance(current_dtype, pl.Datetime): + df = df.with_columns( + (pl.col(feature.name).cast(pl.Int64) * 1000) + .cast(pl.Datetime(time_zone='UTC')) + .alias(feature.name) + ) + return df def remove_derived_features(self) -> RetrivalJob: diff --git a/aligned/sources/local.py b/aligned/sources/local.py index 1c1090db..4427a72f 100644 --- a/aligned/sources/local.py +++ b/aligned/sources/local.py @@ -59,9 +59,12 @@ async def as_repo_definition(self) -> RepoDefinition: return RepoDefinition.from_json(file) -async def data_file_freshness(reference: DataFileReference, column_name: str) -> datetime: - file = await reference.to_polars() - return file.select(column_name).max().collect()[0, column_name] +async def data_file_freshness(reference: DataFileReference, column_name: str) -> datetime | None: + try: + file = await reference.to_polars() + return file.select(column_name).max().collect()[0, column_name] + except UnableToFindFileException: + return None @dataclass @@ -192,7 +195,7 @@ async def feature_view_code(self, view_name: str) -> str: 'from aligned import FileSource\nfrom aligned.sources.local import CsvConfig', ) - async def freshness(self, event_timestamp: EventTimestamp) -> datetime: + async def freshness(self, event_timestamp: EventTimestamp) -> datetime | None: return await data_file_freshness(self, event_timestamp.name) diff --git a/aligned/sources/tests/test_parquet.py b/aligned/sources/tests/test_parquet.py index 3bb7bdd7..341e06f9 100644 --- a/aligned/sources/tests/test_parquet.py +++ b/aligned/sources/tests/test_parquet.py @@ -4,6 +4,32 @@ from conftest import DataTest +@pytest.mark.asyncio +async def test_read_parquet(point_in_time_data_test: DataTest) -> None: + + store = FeatureStore.experimental() + + for source in point_in_time_data_test.sources: + view = source.view + view_name = view.metadata.name + if '_agg' in view_name: + continue + + file_source = FileSource.parquet_at(f'test_data/{view_name}.parquet') + await file_source.write_polars(source.data.lazy()) + + view.metadata = FeatureView.metadata_with( # type: ignore + name=view.metadata.name, + description=view.metadata.description, + batch_source=file_source, + ) + store.add_feature_view(view) + + stored = await store.feature_view(view.metadata.name).all().to_polars() + df = stored.select(source.data.columns).collect() + assert df.frame_equal(source.data) + + @pytest.mark.asyncio async def test_parquest(point_in_time_data_test: DataTest) -> None: @@ -24,7 +50,9 @@ async def test_parquest(point_in_time_data_test: DataTest) -> None: store.add_feature_view(view) job = store.features_for( - point_in_time_data_test.entities.to_dict(as_series=False), point_in_time_data_test.feature_reference + point_in_time_data_test.entities, + point_in_time_data_test.feature_reference, + event_timestamp_column='event_timestamp', ) data = (await job.to_polars()).collect() @@ -35,3 +63,39 @@ async def test_parquest(point_in_time_data_test: DataTest) -> None: ordered_columns = data.select(expected.columns) assert ordered_columns.frame_equal(expected), f'Expected: {expected}\nGot: {ordered_columns}' + + +@pytest.mark.asyncio +async def test_parquet_without_event_timestamp( + point_in_time_data_test_wituout_event_timestamp: DataTest, +) -> None: + + store = FeatureStore.experimental() + + for source in point_in_time_data_test_wituout_event_timestamp.sources: + view = source.view + view_name = view.metadata.name + + file_source = FileSource.parquet_at(f'test_data/{view_name}.parquet') + await file_source.write_polars(source.data.lazy()) + + view.metadata = FeatureView.metadata_with( # type: ignore + name=view.metadata.name, + description=view.metadata.description, + batch_source=file_source, + ) + store.add_feature_view(view) + + job = store.features_for( + point_in_time_data_test_wituout_event_timestamp.entities, + point_in_time_data_test_wituout_event_timestamp.feature_reference, + ) + data = (await job.to_polars()).collect() + + expected = point_in_time_data_test_wituout_event_timestamp.expected_output + + assert expected.shape == data.shape, f'Expected: {expected.shape}\nGot: {data.shape}' + assert set(expected.columns) == set(data.columns), f'Expected: {expected.columns}\nGot: {data.columns}' + + ordered_columns = data.select(expected.columns) + assert ordered_columns.frame_equal(expected), f'Expected: {expected}\nGot: {ordered_columns}' diff --git a/aligned/sources/tests/test_psql.py b/aligned/sources/tests/test_psql.py index 6c971a91..2b5c375c 100644 --- a/aligned/sources/tests/test_psql.py +++ b/aligned/sources/tests/test_psql.py @@ -38,7 +38,9 @@ async def test_postgresql(point_in_time_data_test: DataTest, psql: PostgreSQLCon store.add_feature_view(view) job = store.features_for( - point_in_time_data_test.entities.to_dict(as_series=False), point_in_time_data_test.feature_reference + point_in_time_data_test.entities.to_dict(as_series=False), + point_in_time_data_test.feature_reference, + event_timestamp_column='event_timestamp_column', ) data = (await job.to_polars()).collect() @@ -83,3 +85,43 @@ async def test_postgresql_write(titanic_feature_store: FeatureStore, psql: Postg check_column_order=False, check_dtype=False, ) + + +@pytest.mark.skipif( + platform.uname().machine.startswith('arm'), reason='Needs psycopg2 which is not supported on arm' +) +@pytest.mark.asyncio +async def test_postgresql_without_event( + point_in_time_data_test_wituout_event_timestamp: DataTest, psql: PostgreSQLConfig +) -> None: + + psql_database = environ['PSQL_DATABASE_TEST'] + + store = FeatureStore.experimental() + + for source in point_in_time_data_test_wituout_event_timestamp.sources: + view = source.view + db_name = view.metadata.name + source.data.to_pandas().to_sql(db_name, psql_database, if_exists='replace') + + view.metadata = FeatureView.metadata_with( # type: ignore + name=view.metadata.name, + description=view.metadata.description, + batch_source=psql.table(db_name), + ) + store.add_feature_view(view) + + job = store.features_for( + point_in_time_data_test_wituout_event_timestamp.entities.to_dict(as_series=False), + point_in_time_data_test_wituout_event_timestamp.feature_reference, + event_timestamp_column='event_timestamp_column', + ) + data = (await job.to_polars()).collect() + + expected = point_in_time_data_test_wituout_event_timestamp.expected_output + + assert expected.shape == data.shape, f'Expected: {expected.shape}\nGot: {data.shape}' + assert set(expected.columns) == set(data.columns), f'Expected: {expected.columns}\nGot: {data.columns}' + + ordered_columns = data.select(expected.columns) + assert ordered_columns.frame_equal(expected), f'Expected: {expected}\nGot: {ordered_columns}' diff --git a/aligned/tests/test_model_target.py b/aligned/tests/test_model_target.py index 320a3a39..a0c7f623 100644 --- a/aligned/tests/test_model_target.py +++ b/aligned/tests/test_model_target.py @@ -55,7 +55,7 @@ async def test_titanic_model_with_targets_and_scd(titanic_feature_store_scd: Fea dataset = ( await titanic_feature_store_scd.model('titanic') .with_labels() - .features_for(entities.to_dict(as_series=False)) + .features_for(entities.to_dict(as_series=False), event_timestamp_column='event_timestamp') .to_polars() ) diff --git a/conftest.py b/conftest.py index 50cfa039..6c550aa7 100644 --- a/conftest.py +++ b/conftest.py @@ -803,3 +803,124 @@ class Loan(FeatureView): ], expected_output=expected_output, ) + + +@pytest.fixture +def point_in_time_data_test_wituout_event_timestamp() -> DataTest: + from datetime import datetime, timezone + + placeholder_ds = FileSource.parquet_at('placeholder') + + class CreditHistory(FeatureView): + + metadata = FeatureView.metadata_with('credit_history', description='', batch_source=placeholder_ds) + + dob_ssn = String().as_entity() + event_timestamp = EventTimestamp() + credit_card_due = Int64() + student_loan_due = Int64() + + due_sum = credit_card_due + student_loan_due + + bankruptcies = Int32() + + class CreditHistoryAggregate(FeatureView): + + metadata = FeatureView.metadata_with( + 'credit_history_agg', description='', batch_source=placeholder_ds + ) + + dob_ssn = String().as_entity() + event_timestamp = EventTimestamp() + credit_card_due = Int64() + + credit_sum = credit_card_due.aggregate().over(weeks=1).sum() + + class Loan(FeatureView): + + metadata = FeatureView.metadata_with('loan', description='', batch_source=placeholder_ds) + + loan_id = Int32().as_entity() + event_timestamp = EventTimestamp() + loan_status = Bool().description('If the loan was granted or not') + personal_income = Int64() + loan_amount = Int64() + + first_event_timestamp = datetime(2020, 4, 26, 18, 1, 4, 746575, tzinfo=timezone.utc) + second_event_timestamp = datetime(2020, 4, 27, 18, 1, 4, 746575, tzinfo=timezone.utc) + + credit_data = pl.DataFrame( + { + 'dob_ssn': [ + '19530219_5179', + '19520816_8737', + '19860413_2537', + '19530219_5179', + '19520816_8737', + '19860413_2537', + ], + 'event_timestamp': [ + first_event_timestamp, + first_event_timestamp, + first_event_timestamp, + second_event_timestamp, + second_event_timestamp, + second_event_timestamp, + ], + 'credit_card_due': [8419, 2944, 833, 5936, 1575, 6263], + 'student_loan_due': [22328, 2515, 33000, 48955, 9501, 35510], + 'bankruptcies': [0, 0, 0, 0, 0, 0], + } + ) + + loan_data = pl.DataFrame( + { + 'loan_id': [10000, 10001, 10002, 10000, 10001, 10002], + 'event_timestamp': [ + first_event_timestamp, + first_event_timestamp, + first_event_timestamp, + second_event_timestamp, + second_event_timestamp, + second_event_timestamp, + ], + 'loan_status': [1, 0, 1, 1, 1, 1], + 'personal_income': [59000, 9600, 9600, 65500, 54400, 9900], + 'loan_amount': [35000, 1000, 5500, 35000, 35000, 2500], + } + ) + + entities = pl.DataFrame( + { + 'dob_ssn': ['19530219_5179', '19520816_8737', '19860413_2537'], + 'loan_id': [10000, 10001, 10002], + } + ) + + expected_output = pl.DataFrame( + { + 'dob_ssn': ['19530219_5179', '19520816_8737', '19860413_2537'], + 'loan_id': [10000, 10001, 10002], + 'credit_card_due': [5936, 1575, 6263], + # 'credit_sum': [8419 + 5936, 2944 + 1575, 833 + 6263], + 'student_loan_due': [48955, 9501, 35510], + 'due_sum': [5936 + 48955, 1575 + 9501, 6263 + 35510], + 'personal_income': [65500, 54400, 9900], + } + ) + + return DataTest( + sources=[ + FeatureData(data=credit_data, view=CreditHistory()), + FeatureData(data=loan_data, view=Loan()), + FeatureData(data=credit_data, view=CreditHistoryAggregate()), + ], + entities=entities, + feature_reference=[ + 'credit_history:credit_card_due', + 'credit_history:student_loan_due', + 'credit_history:due_sum', + 'loan:personal_income', + ], + expected_output=expected_output, + ) diff --git a/test_data/feature-store.json b/test_data/feature-store.json index 0e7205f5..e43846a2 100644 --- a/test_data/feature-store.json +++ b/test_data/feature-store.json @@ -1 +1 @@ -{"metadata": {"created_at": "2023-10-18T17:02:45.564875", "name": "feature_store_location.py", "github_url": null}, "feature_views": [{"name": "titanic", "tags": {}, "batch_data_source": {"mapping_keys": {"PassengerId": "passenger_id", "Age": "age", "Sex": "sex", "Survived": "survived", "SibSp": "sibsp", "UpdatedAt": "updated_at"}, "type_name": "csv", "path": "test_data/titanic_scd_data.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "requierd"}, {"name": "upper_bound", "value": 100.0}, {"name": "lower_bound", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "updated_at", "dtype": {"name": "datetime"}, "description": null, "tags": null, "constraints": null}], "derived_features": [{"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "word_vectoriser", "dtype": {"name": "embedding"}, "key": "name", "model": {"name": "gensim", "model_name": "glove-wiki-gigaword-50", "config": {"to_lowercase": false, "deaccent": false, "encoding": "utf8", "errors": "strict"}, "loaded_model": null}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "double_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul_val", "dtype": {"name": "float"}, "key": "sibsp", "value": {"name": "int", "value": 2}}, "depth": 1}, {"name": "square_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul", "dtype": {"name": "float"}, "front": "sibsp", "behind": "sibsp"}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}], "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": {"name": "updated_at", "ttl": null, "description": null, "tags": null, "dtype": {"name": "datetime"}}, "stream_data_source": {"mapping_keys": {}, "name": "redis", "topic_name": "titanic_stream", "config": {"env_var": "REDIS_URL"}, "record_coder": {"coder_type": "json", "key": "json"}}, "application_source": null, "staging_source": null, "event_triggers": null, "contacts": null, "indexes": [{"location": {"name": "titanic", "location": "feature_view"}, "vector": {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null}, "vector_dim": 50, "metadata": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "requierd"}, {"name": "upper_bound", "value": 100.0}, {"name": "lower_bound", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "storage": {"type_name": "redis", "config": {"env_var": "REDIS_URL"}, "name": "name_embedding_index", "initial_cap": 10000, "distance_metric": "COSINE", "index_alogrithm": "FLAT", "embedding_type": "FLOAT32"}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}]}]}, {"name": "titanic_parquet", "tags": {}, "batch_data_source": {"mapping_keys": {}, "type_name": "parquet", "path": "test_data/titanic.parquet", "config": {"engine": "auto", "compression": "snappy", "should_write_index": false}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "requierd"}, {"name": "upper_bound", "value": 100.0}, {"name": "lower_bound", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}], "derived_features": [{"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}], "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": null, "stream_data_source": null, "application_source": null, "staging_source": null, "event_triggers": null, "contacts": null, "indexes": []}], "combined_feature_views": [], "models": [{"name": "titanic", "features": [{"name": "is_male", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, {"name": "has_siblings", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "age", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "float"}}], "predictions_view": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "probability", "dtype": {"name": "float"}, "description": "The probability of target named will_survive being 'True'.", "tags": null, "constraints": null}], "derived_features": [{"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "probability", "location": {"name": "titanic", "location": "model"}, "dtype": {"name": "float"}}], "transformation": {"name": "map_arg_max", "dtype": {"name": "bool"}, "column_mappings": {"probability": {"name": "bool", "value": true}}}, "depth": 1}], "model_version_column": null, "event_timestamp": null, "source": null, "stream_source": null, "regression_targets": [], "classification_targets": [{"estimating": {"name": "survived", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, "feature": {"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null}, "on_ground_truth_event": null, "event_trigger": null, "class_probabilities": [{"outcome": {"name": "bool", "value": true}, "feature": {"name": "probability", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null}}], "confidence": null}]}, "description": "A model predicting if a passenger will survive", "contacts": null, "tags": null, "dataset_folder": null}], "enrichers": []} +{"metadata": {"created_at": "2023-11-02T08:02:25.567517", "name": "feature_store_location.py", "github_url": null}, "feature_views": [{"name": "titanic_parquet", "tags": {}, "batch_data_source": {"mapping_keys": {}, "type_name": "parquet", "path": "test_data/titanic.parquet", "config": {"engine": "auto", "compression": "snappy", "should_write_index": false}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound", "value": 100.0}, {"name": "lower_bound", "value": 0.0}, {"name": "requierd"}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "derived_features": [{"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic_parquet", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}], "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": null, "stream_data_source": null, "application_source": null, "staging_source": null, "event_triggers": null, "contacts": null, "indexes": []}, {"name": "titanic", "tags": {}, "batch_data_source": {"mapping_keys": {"PassengerId": "passenger_id", "Age": "age", "Sex": "sex", "Survived": "survived", "SibSp": "sibsp", "UpdatedAt": "updated_at"}, "type_name": "csv", "path": "test_data/titanic_scd_data.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "updated_at", "dtype": {"name": "datetime"}, "description": null, "tags": null, "constraints": null}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound", "value": 100.0}, {"name": "lower_bound", "value": 0.0}, {"name": "requierd"}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": null}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "derived_features": [{"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "word_vectoriser", "dtype": {"name": "embedding"}, "key": "name", "model": {"name": "gensim", "model_name": "glove-wiki-gigaword-50", "config": {"to_lowercase": false, "deaccent": false, "encoding": "utf8", "errors": "strict"}, "loaded_model": null}}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "double_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul_val", "dtype": {"name": "float"}, "key": "sibsp", "value": {"name": "int", "value": 2}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "square_sibsp", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "mul", "dtype": {"name": "float"}, "front": "sibsp", "behind": "sibsp"}, "depth": 1}], "description": "Some features from the titanic dataset", "aggregated_features": [], "event_timestamp": {"name": "updated_at", "ttl": null, "description": null, "tags": null, "dtype": {"name": "datetime"}}, "stream_data_source": {"mapping_keys": {}, "name": "redis", "topic_name": "titanic_stream", "config": {"env_var": "REDIS_URL"}, "record_coder": {"coder_type": "json", "key": "json"}}, "application_source": null, "staging_source": null, "event_triggers": null, "contacts": null, "indexes": [{"location": {"name": "titanic", "location": "feature_view"}, "vector": {"name": "name_embedding", "dtype": {"name": "embedding"}, "description": null, "tags": null, "constraints": null}, "vector_dim": 50, "metadata": [{"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound", "value": 100.0}, {"name": "lower_bound", "value": 0.0}, {"name": "requierd"}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "in_domain", "values": ["male", "female"]}]}], "storage": {"type_name": "redis", "config": {"env_var": "REDIS_URL"}, "name": "name_embedding_index", "initial_cap": 10000, "distance_metric": "COSINE", "index_alogrithm": "FLAT", "embedding_type": "FLOAT32"}, "entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}]}]}], "combined_feature_views": [], "models": [{"name": "titanic", "features": [{"name": "is_male", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, {"name": "has_siblings", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, {"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}, {"name": "age", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "float"}}], "predictions_view": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "probability", "dtype": {"name": "float"}, "description": "The probability of target named will_survive being 'True'.", "tags": null, "constraints": null}], "derived_features": [{"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "probability", "location": {"name": "titanic", "location": "model"}, "dtype": {"name": "float"}}], "transformation": {"name": "map_arg_max", "dtype": {"name": "bool"}, "column_mappings": {"probability": {"name": "bool", "value": true}}}, "depth": 1}], "model_version_column": null, "event_timestamp": null, "source": null, "stream_source": null, "regression_targets": [], "classification_targets": [{"estimating": {"name": "survived", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "bool"}}, "feature": {"name": "will_survive", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null}, "on_ground_truth_event": null, "event_trigger": null, "class_probabilities": [{"outcome": {"name": "bool", "value": true}, "feature": {"name": "probability", "dtype": {"name": "float"}, "description": null, "tags": null, "constraints": null}}], "confidence": null}]}, "description": "A model predicting if a passenger will survive", "contacts": null, "tags": null, "dataset_folder": null}], "enrichers": []}