-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodels.py
63 lines (48 loc) · 1.37 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
import joblib
def train_knn(X, y, n_neighbors=20):
"""
Trains a k-Nearest Neighbors (kNN) classifier.
Args:
X: Training feature matrix.
y: Training labels.
n_neighbors: Number of neighbors to consider in kNN.
Returns:
Trained kNN model.
"""
knn = KNeighborsClassifier(n_neighbors=n_neighbors)
knn.fit(X, y)
return knn
def train_naive_bayes(X, y, alpha=0.6):
"""
Trains a Naive Bayes classifier.
Args:
X: Training feature matrix.
y: Training labels.
Returns:
Trained Naive Bayes model.
"""
nbc = MultinomialNB(alpha=alpha, fit_prior=False)
nbc.fit(X, y)
return nbc
def save_model(model, vectorizer, filepath):
"""
Saves the trained model and vectorizer as a pipeline.
Args:
model: Trained model to save.
vectorizer: Fitted TfidfVectorizer to save.
filepath: Path to save the pipeline.
"""
pipeline = Pipeline([('tfidf', vectorizer), ('classifier', model)])
joblib.dump(pipeline, filepath)
def load_model(filepath):
"""
Loads a model from a file.
Args:
filepath: Path to the saved model file.
Returns:
Loaded model.
"""
return joblib.load(filepath)