forked from iandurbach/ml-for-ecology
-
Notifications
You must be signed in to change notification settings - Fork 0
/
regtrees-intro.R
66 lines (54 loc) · 2.12 KB
/
regtrees-intro.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#### Regression trees I: Intro
# - building the tree
# - plotting the tree
# - assessing accuracy
# - choosing tuning parameters
library(tree)
load("data/aloe.RData")
head(aloe)
# build the full regression tree on all the data, later we'll do train/test
tree_aloe <- tree(log(tottrees) ~ Latitude + Longitude,
data = aloe,
split = "deviance")
# plot the tree
plot(tree_aloe)
text(tree_aloe, cex=0.9)
# visualize the partitioned feature space
par(xaxs="i", yaxs="i")
plot(aloe$Longitude,aloe$Latitude, pch=21, cex=0.6,
col=terrain.colors(11)[1+floor(aloe$tottrees)],
bg=terrain.colors(11)[1+floor(aloe$tottrees)],
xlab="Longitude",ylab="Latitude",main="Predicted Log Abundance", bty="o")
partition.tree(tree_aloe,ordvars=c("Longitude","Latitude"), add=TRUE, lwd=3)
## assess accuracy (MSE/deviance)
# get predictions
pred_aloe <- predict(tree_aloe)
# check with scatterplot
plot(tree_aloe$y, pred_aloe, xlab = "Observed", ylab = "Predicted")
# mean square error
mean((tree_aloe$y - pred_aloe)^2)
# tree.control allows you some finer control, experiment with options
# see help(tree.control) for details
# To produce a tree that fits the data perfectly, set mindev = 0 and
# minsize = 2, if the limit on tree depth allows such a tree.
tree_aloe <- tree(log(tottrees) ~ Latitude + Longitude,
data = aloe,
split = "deviance",
mincut = 1,
minsize = 2,
mindev = 0)
# plot the tree
plot(tree_aloe)
text(tree_aloe, cex=0.9)
# visualize the partitioned feature space
par(xaxs="i", yaxs="i")
plot(aloe$Longitude,aloe$Latitude, pch=21, cex=0.6,
col=terrain.colors(11)[1+floor(aloe$tottrees)],
bg=terrain.colors(11)[1+floor(aloe$tottrees)],
xlab="Longitude",ylab="Latitude",main="Predicted Log Abundance", bty="o")
partition.tree(tree_aloe,ordvars=c("Longitude","Latitude"), add=TRUE, lwd=3)
## assess accuracy (MSE/deviance)
pred_aloe <- predict(tree_aloe)
plot(tree_aloe$y, pred_aloe, xlab = "Observed", ylab = "Predicted")
mean((tree_aloe$y - pred_aloe)^2)
#? model looks great, but what's the problem?