-
Notifications
You must be signed in to change notification settings - Fork 0
/
feature_Selection_Lasso
39 lines (27 loc) · 1.37 KB
/
feature_Selection_Lasso
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from sklearn.linear_model import Lasso
from sklearn.feature_selection import SelectFromModel
# first, I specify the Lasso Regression model, ( Lassoo accounts to 0)
# select a suitable alpha (equivalent of penalty).
# The bigger the alpha the less features that will be selected.
# Then I use the selectFromModel object from sklearn, which
# will select the features which coefficients are non-zero
sel_ = SelectFromModel(Lasso(alpha=0.005, random_state=0)) # remember to set the seed, the random state in this function
sel_.fit(X_train, y_train)
# this command let's us visualise those features that were kept.
# Kept features have a True indicator
sel_.get_support()
# let's print the number of total and selected features
# this is how we can make a list of the selected features
selected_feat = X_train.columns[(sel_.get_support())]
# let's print some stats
print('total features: {}'.format((X_train.shape[1])))
print('selected features: {}'.format(len(selected_feat)))
print('features with coefficients shrank to zero: {}'.format(
np.sum(sel_.estimator_.coef_ == 0)))
# print the selected features
selected_feat
################## Save selected features ################
selected_feats = X_train.columns[(sel_.estimator_.coef_ != 0).ravel().tolist()]
selected_feats
# now we save the selected list of features
pd.Series(selected_feats).to_csv('selected_features.csv', index=False)