Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement save and load methods in DIET classifier #240

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ dmypy.json

# Rasa traning folder
.rasa/*
tests/.rasa/*
moviebot/*
testing/*

Expand Down
36 changes: 26 additions & 10 deletions dialoguekit/nlu/models/diet_classifier_rasa.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
"""

import copy
import logging
import os
import pickle
import tempfile
from pathlib import Path
from typing import Any, Dict, List, Optional, Text, Type
Expand Down Expand Up @@ -73,7 +75,11 @@ def __init__(
else:
raise TypeError("Provided 'training_data_path' is not a string!")

self.init_pipeline()
try:
self.load_model()
self._processes_utterances: Dict[str, Any] = {}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this? Are these processed utterances? If yes, can we change the name?

except Exception:
self.init_pipeline()

def init_pipeline(self) -> None:
"""Creates classifier and initialize.
Expand Down Expand Up @@ -113,7 +119,7 @@ def init_pipeline(self) -> None:
),
resource=self._def_resource,
)
self._processes_utterances: Dict[str, Any] = {}
self._processes_utterances = {}

def train_model(
self,
Expand Down Expand Up @@ -278,18 +284,18 @@ def process_message(

return message

def save_model(self, file_path: str) -> None:
"""Saves the trained model to a file.
def save_model(self, file_path: str = None) -> None:
"""Saves the trained model and preprocess pipeline.

Args:
file_path: File path.

Raises:
NotImplementedError: If not implemented in derived class.
"""
raise NotImplementedError("Rasa Diet")
self._diet.persist()
pipeline_path = os.path.join(self._model_path, "pipeline.pkl")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we could move "pipeline.pkl" to a global variable

pickle.dump(self._component_pipeline, open(pipeline_path, "wb"))
logging.info(f"Model saved to {self._model_path}.")

def load_model(self, file_path: str) -> None:
def load_model(self, file_path: str = None) -> None:
"""Loads a model from a file.

Args:
Expand All @@ -298,4 +304,14 @@ def load_model(self, file_path: str) -> None:
Raises:
NotImplementedError: If not implemented in derived class.
"""
raise NotImplementedError
self._diet = DIETClassifier.load(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we rename it to diet_classifier?

{**DIETClassifier.get_default_config()},
model_storage=self._def_model_storage,
execution_context=ExecutionContext(
GraphSchema({}), node_name="diet_1"
),
resource=self._def_resource,
)
pipeline_path = os.path.join(self._model_path, "pipeline.pkl")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use a global variable here

self._component_pipeline = pickle.load(open(pipeline_path, "rb"))
logging.info(f"Model loaded from {self._model_path}.")
12 changes: 7 additions & 5 deletions dialoguekit/utils/dialogue_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,21 +153,23 @@ def reward(self) -> Dict[str, List[Dict[str, float]]]:
# Start dialogue with Agent first.
for j, utterance in enumerate(dialogue.utterances):
if utterance.participant == DialogueParticipant.AGENT.name:
dialogue_utterances_start_agent = dialogue.utterances[j:]
dialogue_utterances_start_agent: List[
AnnotatedUtterance
] = dialogue.utterances[j:]
break
previous_sender = dialogue_utterances_start_agent[0].participant
previous_intent = dialogue_utterances_start_agent[0].intent # type: ignore[attr-defined] # noqa
previous_intent = dialogue_utterances_start_agent[0].intent
for j, annotated_utterance in enumerate(
dialogue_utterances_start_agent, start=1
):
if (
annotated_utterance.participant == previous_sender
and previous_intent == annotated_utterance.intent # type: ignore[attr-defined] # noqa
and previous_intent == annotated_utterance.intent
):
n_repeat_intents += 1
previous_intent = None
continue
previous_intent = annotated_utterance.intent # type: ignore[attr-defined] # noqa
previous_intent = annotated_utterance.intent
previous_sender = annotated_utterance.participant

results["dialogues"][i]["repeats"] = n_repeat_intents
Expand Down Expand Up @@ -201,7 +203,7 @@ def _check_included_intents(self) -> Dict[str, Any]:
],
}

dialogue_intents = []
dialogue_intents: List[Intent] = []
reward = self._reward_config["full_set_points"]
for dialogue in self._dialogues:
for utterance in dialogue.utterances:
Expand Down
65 changes: 55 additions & 10 deletions tests/nlu/test_diet_classifier_rasa.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
"""Tests for IntentClassifierRasa."""

import os
from typing import List
from unittest import mock

import pytest
from rasa.nlu.classifiers.diet_classifier import DIETClassifier

from dialoguekit.core import Intent, Utterance
from dialoguekit.nlu import IntentClassifierRasa
Expand All @@ -10,13 +15,13 @@


@pytest.fixture
def intents():
def intents() -> List[Intent]:
"""List of intents fixture."""
return [Intent(f"intent {i}") for i in range(1, 7)]


@pytest.fixture
def utterances_1():
def utterances_1() -> List[Utterance]:
"""List of utterances fixture."""
return [
Utterance(text, participant=DialogueParticipant.AGENT)
Expand All @@ -31,13 +36,13 @@ def utterances_1():


@pytest.fixture
def labels_1():
def labels_1() -> List[Intent]:
"""List of intent labels fixture."""
return [Intent(f"intent {i}") for i in range(1, 6)]


@pytest.fixture
def utterances_2():
def utterances_2() -> List[Utterance]:
"""List of utterances fixture."""
return [
Utterance(text, participant=DialogueParticipant.AGENT)
Expand All @@ -49,20 +54,24 @@ def utterances_2():


@pytest.fixture
def labels_2():
def labels_2() -> List[Intent]:
"""List of intent labels fixture."""
return [Intent(f"intent {i}") for i in [1, 3]]


def test_classify_intent_exact_patterns(intents, utterances_1, labels_1):
def test_classify_intent_exact_patterns(
intents: List[Intent],
utterances_1: List[Utterance],
labels_1: List[Intent],
) -> None:
"""Tests label prediction.

Args:
intents: Test intents
utterances_1: Test utterances to train on
labels_1: Test utterance intent labels.
"""
intent_classifier = IntentClassifierRasa(intents)
intent_classifier = IntentClassifierRasa(intents, model_path="tests/.rasa")
intent_classifier.train_model(utterances_1, labels_1)
for utterance_template, intent in zip(utterances_1, labels_1):
utterance_text = utterance_template.text.replace(
Expand All @@ -73,11 +82,16 @@ def test_classify_intent_exact_patterns(intents, utterances_1, labels_1):
)
predicted_intent = intent_classifier.classify_intent(utterance)
assert predicted_intent.label == intent.label
os.system("rm -rf tests/.rasa")


def test_classify_intent_similar_patterns(
intents, utterances_1, labels_1, utterances_2, labels_2
):
intents: List[Intent],
utterances_1: List[Utterance],
labels_1: List[Intent],
utterances_2: List[Utterance],
labels_2: List[Intent],
) -> None:
"""Tests label prediction.

Args:
Expand All @@ -87,7 +101,7 @@ def test_classify_intent_similar_patterns(
utterances_2: Secondary test utterances.
labels_2: Secondary test labels.
"""
intent_classifier = IntentClassifierRasa(intents)
intent_classifier = IntentClassifierRasa(intents, model_path="tests/.rasa")
intent_classifier.train_model(utterances_1, labels_1)
for utterance_template, intent in zip(utterances_2, labels_2):
utterance_text = utterance_template.text.replace(
Expand All @@ -98,3 +112,34 @@ def test_classify_intent_similar_patterns(
)
predicted_intent = intent_classifier.classify_intent(utterance)
assert predicted_intent.label == intent.label
os.system("rm -rf tests/.rasa")


def test_load_save_model(
intents: List[Intent], utterances_1: List[Utterance], labels_1: List[Intent]
) -> None:
"""Tests loading and saving model."""
intent_classifier = IntentClassifierRasa(intents, model_path="tests/.rasa")
intent_classifier.train_model(utterances_1, labels_1)
intent_classifier.save_model()

assert os.path.exists("tests/.rasa/pipeline.pkl")

intent_classifer_2 = IntentClassifierRasa(intents, model_path="tests/.rasa")

assert len(intent_classifer_2._component_pipeline) == len(
intent_classifier._component_pipeline
)
assert isinstance(intent_classifer_2._diet, DIETClassifier)
assert intent_classifer_2._processes_utterances == {}
os.system("rm -rf tests/.rasa")


@mock.patch.object(IntentClassifierRasa, "init_pipeline")
def test_load_empty_model(
mock_init_pipeline: mock.MagicMock, intents: List[Intent]
) -> None:
"""Tests if pipeline is initialized when loading empty model."""
_ = IntentClassifierRasa(intents, model_path="tests/.rasa")
mock_init_pipeline.assert_called_once()
os.system("rm -rf tests/.rasa")