-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdiagram of how SMOTE works.R
41 lines (30 loc) · 1.15 KB
/
diagram of how SMOTE works.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
library(performanceEstimation)
library(tidyverse)
df<-iris %>%
rename(Feature1 = Sepal.Length,
Feature2 = Sepal.Width) %>%
mutate(data = "Original",
Class = factor(ifelse(Species=="setosa","Minor","Major"))) %>%
select(Feature1, Feature2,Class)
ggplot(df) + geom_point(aes(x=Feature1, y = Feature2, colour = Class)) +
theme_bw()
df_smote<-smote(Class ~ .,df,
perc.over= 1,
perc.under = 0.1)
table(df$Class)
table(df_smote$Class)
ggplot(df) + geom_point(aes(x=Feature1, y = Feature2, colour = Class), alpha=0.2) +
theme_bw()
ggplot(bind_rows(df,df_smote)) + geom_point(aes(x=Feature1, y = Feature2, colour = Class), alpha=0.2) +
theme_bw()
df_2<-bind_rows(df,
df_smote %>% mutate(data = "SMOTEd")) %>%
mutate(Class = ifelse(Class =="Major","Major",
ifelse(is.na(data),
"Minor - SMOTEd",
"Minor - Original"))) %>%
pivot_longer(cols=Feature1:Feature2, names_to = "Feature",values_to = "Value")
ggplot(df_2) +
geom_density(aes(x=Value,fill=Class),alpha=0.2) +
theme_bw() +
facet_wrap(vars(Feature),scales="free_x")