-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsvm.py
96 lines (71 loc) · 3.11 KB
/
svm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
## SVM Implementation Using Scikit-Learn
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
import time
# Load dataset
data = pd.read_csv('song_data.csv')
def scale(x):
mean = np.mean(x, axis=0)
std = np.std(x, axis=0)
scaled = abs(x - mean) / std
max = np.max(scaled, axis=0)
min = np.min(scaled, axis=0)
scaled = (scaled - min) / (max-min) * 10
return scaled
x = data.drop(columns=['genre'])
x = x.apply(pd.to_numeric, errors='coerce')
x = x.fillna(x.mean())
x = scale(x)
#print(x)
y = data['genre'].values
# Standardize features
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)
# Split into train and test sets
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.2, random_state=50)
def train_svm(X_train, y_train, X_test, y_test, kernel='linear'):
print(f"\nTraining SVM with {kernel} kernel...")
# Define parameter grid for hyperparameter tuning
param_grid = {
'C': [0.1, 1, 10, 20],
'gamma': ['scale', 'auto'] if kernel in ['rbf', 'poly', 'sigmoid'] else None,
'degree': [2, 3, 4], # if kernel == 'poly' else None,
'kernel': [kernel]
}
# Remove None values from param_grid
param_grid = {k: v for k, v in param_grid.items() if v is not None}
# Initialize SVM model
svm = SVC()
# Perform Grid Search with 5-fold cross-validation
start_time = time.time()
grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)
end_time = time.time()
print(f"Training time: {end_time - start_time:.2f}s")
# Best parameters
print("Best parameters:", grid_search.best_params_)
# random_search = RandomizedSearchCV(estimator=svm, param_distributions=param_grid, n_iter=50, cv=5)
# random_search.fit(x_train, y_train)
# print("Best parameters:", random_search.best_params_)
# best_model = SVC(kernel=kernel) # C=1.0, degree=3, gamma='scale', kernel='rbf' without grid search
# best_model.fit(X_train, y_train)
# # Train the best model
best_model = grid_search.best_estimator_ # grid search model
# best_model = random_search.best_estimator_ # random search model (poor performance)
y_pred = best_model.predict(X_test)
# Evaluate the model
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
return best_model
# Train linear SVM
linear_svm = train_svm(x_train, y_train, x_test, y_test, kernel='linear')
# Train SVM with RBF kernel
rbf_svm = train_svm(x_train, y_train, x_test, y_test, kernel='rbf')
# Train SVM with polynomial kernel
poly_svm = train_svm(x_train, y_train, x_test, y_test, kernel='poly')
sigmoid_svm = train_svm(x_train, y_train, x_test, y_test, kernel='sigmoid')