Skip to content

Commit

Permalink
Added feature versions
Browse files Browse the repository at this point in the history
  • Loading branch information
MatsMoll committed Dec 25, 2023
1 parent de181f1 commit acfba6f
Show file tree
Hide file tree
Showing 13 changed files with 183 additions and 114 deletions.
3 changes: 2 additions & 1 deletion aligned/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
Timestamp,
CustomAggregation,
)
from aligned.compiler.model import model_contract
from aligned.compiler.model import model_contract, FeatureInputVersions
from aligned.data_source.stream_data_source import HttpStreamSource
from aligned.feature_store import FeatureStore
from aligned.feature_view import (
Expand Down Expand Up @@ -61,4 +61,5 @@
'CustomAggregation',
# Schemas
'FeatureLocation',
'FeatureInputVersions',
]
26 changes: 23 additions & 3 deletions aligned/compiler/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from aligned.schemas.folder import DatasetStore, JsonDatasetStore
from aligned.schemas.literal_value import LiteralValue
from aligned.schemas.model import Model as ModelSchema
from aligned.schemas.model import FeatureInputVersions as FeatureVersionSchema
from aligned.schemas.model import PredictionsView
from aligned.schemas.target import ClassificationTarget as ClassificationTargetSchema
from aligned.schemas.target import RegressionTarget as RegressionTargetSchema
Expand All @@ -41,7 +42,7 @@
@dataclass
class ModelMetadata:
name: str
features: list[FeatureReferencable]
features: list[FeatureReferencable] | FeatureInputVersions
# Will log the feature inputs to a model. Therefore, enabling log and wait etc.
# feature_logger: WritableBatchSource | None = field(default=None)
contacts: list[str] | None = field(default=None)
Expand Down Expand Up @@ -120,9 +121,25 @@ def resolve_dataset_store(dataset_store: DatasetStore | StorageFileReference) ->
return JsonDatasetStore(dataset_store)


@dataclass
class FeatureInputVersions:

default_version: str
versions: dict[str, list[FeatureReferencable]]

def compile(self) -> FeatureVersionSchema:
return FeatureVersionSchema(
default_version=self.default_version,
versions={
version: [feature.feature_referance() for feature in features]
for version, features in self.versions.items()
},
)


def model_contract(
name: str,
features: list[FeatureReferencable],
features: list[FeatureReferencable] | FeatureInputVersions,
contacts: list[str] | None = None,
tags: dict[str, str] | None = None,
description: str | None = None,
Expand Down Expand Up @@ -240,7 +257,10 @@ class MyModel(ModelContract):
inference_view.features.add(feature.feature())

# Needs to run after the feature views have compiled
features: set[FeatureReferance] = {feature.feature_referance() for feature in metadata.features}
if isinstance(metadata.features, FeatureInputVersions):
features = metadata.features.compile()
else:
features = {feature.feature_referance() for feature in metadata.features}

for target, probabilities in probability_features.items():
from aligned.schemas.transformation import MapArgMax
Expand Down
14 changes: 13 additions & 1 deletion aligned/feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from prometheus_client import Histogram

from aligned.compiler.model import ModelContractWrapper
from aligned.schemas.model import FeatureInputVersions
from aligned.data_file import DataFileReference, upsert_on_column
from aligned.data_source.batch_data_source import BatchDataSource
from aligned.enricher import Enricher
Expand Down Expand Up @@ -724,18 +725,29 @@ class ModelFeatureStore:

model: ModelSchema
store: FeatureStore
selected_version: str | None = None

@property
def location(self) -> FeatureLocation:
return FeatureLocation.model(self.model.name)

def raw_string_features(self, except_features: set[str]) -> set[str]:

if isinstance(self.model.features, FeatureInputVersions):
version = self.selected_version or self.model.features.default_version
features = self.model.features.features_for(version)
else:
features = self.model.features

return {
f'{feature.location.identifier}:{feature.name}'
for feature in self.model.features
for feature in features
if feature.name not in except_features
}

def using_version(self, version: str) -> ModelFeatureStore:
return ModelFeatureStore(self.model, self.store, version)

def request(
self, except_features: set[str] | None = None, event_timestamp_column: str | None = None
) -> FeatureRequest:
Expand Down
16 changes: 15 additions & 1 deletion aligned/schemas/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,20 @@
logger = logging.getLogger(__name__)


@dataclass
class FeatureInputVersions(Codable):

default_version: str
versions: dict[str, list[FeatureReferance]]

def features_for(self, version: str) -> list[FeatureReferance]:
return self.versions.get(version, [])

@property
def default_features(self) -> list[FeatureReferance]:
return self.features_for(self.default_version)


@dataclass
class Target(Codable):
estimating: FeatureReferance
Expand Down Expand Up @@ -111,7 +125,7 @@ def labels(self) -> set[Feature]:
@dataclass
class Model(Codable):
name: str
features: set[FeatureReferance]
features: set[FeatureReferance] | FeatureInputVersions
predictions_view: PredictionsView
description: str | None = field(default=None)
contacts: list[str] | None = field(default=None)
Expand Down
26 changes: 24 additions & 2 deletions aligned/tests/test_models_as_feature.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from aligned import Bool, FeatureStore, FileSource, Int32, String
from aligned.feature_view.feature_view import feature_view
from aligned.compiler.model import model_contract
from aligned.compiler.model import FeatureInputVersions, model_contract
from aligned.schemas.feature import FeatureLocation


Expand All @@ -25,7 +25,16 @@ class OtherView:
other = OtherView()


@model_contract('test_model', features=[view.feature_a, other.feature_b])
@model_contract(
'test_model',
features=FeatureInputVersions(
default_version='v1',
versions={
'v1': [view.feature_a, other.feature_b],
'v2': [view.feature_a, other.feature_b, other.is_true],
},
),
)
class First:

target = other.is_true.as_classification_label()
Expand All @@ -43,6 +52,7 @@ class Second:
def test_model_referenced_as_feature() -> None:
model = Second.compile() # type: ignore

assert isinstance(model.features, set)
feature = list(model.features)[0]

assert feature.location == FeatureLocation.model('test_model')
Expand All @@ -60,3 +70,15 @@ def test_model_request() -> None:

model_request = store.model('test_model').request()
assert model_request.features_to_include == {'feature_a', 'feature_b', 'view_id', 'other_id'}


def test_model_version() -> None:
store = FeatureStore.experimental()
store.add_feature_view(View) # type: ignore
store.add_feature_view(OtherView) # type: ignore
store.add_model(First)

assert len(store.feature_views) == 2

model_request = store.model('test_model').using_version('v2').request()
assert model_request.features_to_include == {'feature_a', 'is_true', 'feature_b', 'view_id', 'other_id'}
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "aligned"
version = "0.0.55"
version = "0.0.56"
description = "A data managment and lineage tool for ML applications."
authors = ["Mats E. Mollestad <[email protected]>"]
license = "Apache-2.0"
Expand Down
Binary file modified test_data/credit_history_mater.parquet
Binary file not shown.
2 changes: 1 addition & 1 deletion test_data/feature-store.json

Large diffs are not rendered by default.

Binary file modified test_data/test_model.parquet
Binary file not shown.
2 changes: 1 addition & 1 deletion test_data/titanic-sets.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}, {"name": "in_domain", "values": ["male", "female"]}]}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "upper_bound_inc", "value": 100.0}]}, {"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "lower_bound_inc", "value": 0.0}, {"name": "optional"}, {"name": "upper_bound_inc", "value": 20.0}]}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []}
{"raw_data": [], "train_test": [], "train_test_validation": [{"id": "titanic_test", "name": null, "request_result": {"entities": [{"name": "passenger_id", "dtype": {"name": "int32"}, "description": null, "tags": null, "constraints": null}], "features": [{"name": "sibsp", "dtype": {"name": "int32"}, "description": "Number of siblings on titanic", "tags": null, "constraints": [{"name": "optional"}, {"name": "upper_bound_inc", "value": 20.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "is_male", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "male"}}, "depth": 1}, {"name": "age", "dtype": {"name": "float"}, "description": "A float as some have decimals", "tags": null, "constraints": [{"name": "upper_bound_inc", "value": 100.0}, {"name": "lower_bound_inc", "value": 0.0}]}, {"name": "name", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "is_mr", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "name", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "contains", "dtype": {"name": "bool"}, "key": "name", "value": "Mr."}, "depth": 1}, {"name": "has_siblings", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sibsp", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "int32"}}], "transformation": {"name": "not-equals", "dtype": {"name": "bool"}, "key": "sibsp", "value": {"name": "int", "value": 0}}, "depth": 1}, {"name": "cabin", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}]}, {"name": "is_female", "dtype": {"name": "bool"}, "description": null, "tags": null, "constraints": null, "depending_on": [{"name": "sex", "location": {"name": "titanic", "location": "feature_view"}, "dtype": {"name": "string"}}], "transformation": {"name": "equals", "dtype": {"name": "bool"}, "key": "sex", "value": {"name": "string", "value": "female"}}, "depth": 1}, {"name": "sex", "dtype": {"name": "string"}, "description": null, "tags": null, "constraints": [{"name": "optional"}, {"name": "in_domain", "values": ["male", "female"]}]}, {"name": "survived", "dtype": {"name": "bool"}, "description": "If the passenger survived", "tags": null, "constraints": null}], "event_timestamp": null}, "train_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-train.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "test_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-test.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "validation_dataset": {"mapping_keys": {}, "type_name": "csv", "path": "test_data/titanic-validate.csv", "csv_config": {"seperator": ",", "compression": "infer", "should_write_index": false}}, "train_size_fraction": 0.6, "test_size_fraction": 0.20000000000000007, "validate_size_fraction": 0.19999999999999996, "target": ["survived"], "description": null, "tags": null}], "active_learning": []}
42 changes: 21 additions & 21 deletions test_data/titanic-test.csv
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
name,sex,is_female,is_mr,is_male,age,sibsp,has_siblings,passenger_id,cabin,survived
"Sirayanian, Mr. Orsen",male,False,True,True,22.0,0,False,61,,False
"Icard, Miss. Amelie",female,True,False,False,38.0,0,False,62,B28,True
"Harris, Mr. Henry Birkhardt",male,False,True,True,45.0,1,True,63,C83,False
"Skoog, Master. Harald",male,False,False,True,4.0,3,True,64,,False
"Stewart, Mr. Albert A",male,False,True,True,,0,False,65,,False
"Moubarek, Master. Gerios",male,False,False,True,,1,True,66,,True
"Nye, Mrs. (Elizabeth Ramell)",female,True,True,False,29.0,0,False,67,F33,True
"Crease, Mr. Ernest James",male,False,True,True,19.0,0,False,68,,False
"Andersson, Miss. Erna Alexandra",female,True,False,False,17.0,4,True,69,,True
"Kink, Mr. Vincenz",male,False,True,True,26.0,2,True,70,,False
"Jenkin, Mr. Stephen Curnow",male,False,True,True,32.0,0,False,71,,False
"Goodwin, Miss. Lillian Amy",female,True,False,False,16.0,5,True,72,,False
"Hood, Mr. Ambrose Jr",male,False,True,True,21.0,0,False,73,,False
"Chronopoulos, Mr. Apostolos",male,False,True,True,26.0,1,True,74,,False
"Bing, Mr. Lee",male,False,True,True,32.0,0,False,75,,True
"Moen, Mr. Sigurd Hansen",male,False,True,True,25.0,0,False,76,F G73,False
"Staneff, Mr. Ivan",male,False,True,True,,0,False,77,,False
"Moutal, Mr. Rahamin Haim",male,False,True,True,,0,False,78,,False
"Caldwell, Master. Alden Gates",male,False,False,True,0.83,0,False,79,,True
"Dowdell, Miss. Elizabeth",female,True,False,False,30.0,0,False,80,,True
is_male,age,name,is_mr,has_siblings,cabin,is_female,passenger_id,survived,sex,sibsp
True,22.0,"Sirayanian, Mr. Orsen",True,False,,False,61,False,male,0
False,38.0,"Icard, Miss. Amelie",False,False,B28,True,62,True,female,0
True,45.0,"Harris, Mr. Henry Birkhardt",True,True,C83,False,63,False,male,1
True,4.0,"Skoog, Master. Harald",False,True,,False,64,False,male,3
True,,"Stewart, Mr. Albert A",True,False,,False,65,False,male,0
True,,"Moubarek, Master. Gerios",False,True,,False,66,True,male,1
False,29.0,"Nye, Mrs. (Elizabeth Ramell)",True,False,F33,True,67,True,female,0
True,19.0,"Crease, Mr. Ernest James",True,False,,False,68,False,male,0
False,17.0,"Andersson, Miss. Erna Alexandra",False,True,,True,69,True,female,4
True,26.0,"Kink, Mr. Vincenz",True,True,,False,70,False,male,2
True,32.0,"Jenkin, Mr. Stephen Curnow",True,False,,False,71,False,male,0
False,16.0,"Goodwin, Miss. Lillian Amy",False,True,,True,72,False,female,5
True,21.0,"Hood, Mr. Ambrose Jr",True,False,,False,73,False,male,0
True,26.0,"Chronopoulos, Mr. Apostolos",True,True,,False,74,False,male,1
True,32.0,"Bing, Mr. Lee",True,False,,False,75,True,male,0
True,25.0,"Moen, Mr. Sigurd Hansen",True,False,F G73,False,76,False,male,0
True,,"Staneff, Mr. Ivan",True,False,,False,77,False,male,0
True,,"Moutal, Mr. Rahamin Haim",True,False,,False,78,False,male,0
True,0.83,"Caldwell, Master. Alden Gates",False,False,,False,79,True,male,0
False,30.0,"Dowdell, Miss. Elizabeth",False,False,,True,80,True,female,0
Loading

0 comments on commit acfba6f

Please sign in to comment.