-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathALCC_AIC.R
140 lines (110 loc) · 4.29 KB
/
ALCC_AIC.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
library(tidyverse)
library(AICcmodavg)
# ALCC demo
df <- data.frame(
stv = c(179, 52, 43, 17, 16, 22, 83, 38, 28, 28, 50, 94, 54, 196, 74, 83, 43,
40, 90, 98, 76, 65, 56, 76, 108, 27, 26, 60, 174, 177, 92, 146, 71, 81,
80, 66, 35, 30, 51, 50, 51),
ry = c(92.39102, 97.34460, 98.83400, 21.51055, 31.69724, 42.37560, 96.12094,
69.81919, 58.44371, 66.13142, 92.36598, 85.84545, 97.95544, 97.67861,
98.66029, 95.53441, 61.64348, 82.68268, 92.35338, 95.93326, 96.47150,
85.64553, 100.00000, 87.57085, 100.00000, 56.42384, 60.79521, 97.39357,
100.00000, 98.98271, 84.51251, 96.53179, 99.39140, 99.67509, 97.22519,
88.28658, 80.46175, 74.29626, 91.46719, 96.81733, 99.84994))
df_2 <- data.frame(stv = c(1, 1, 11, 11, 21, 21, 31, 31),
ry = c(95, 100, 95, 100, 85, 90, 85, 90))
qplot(data = df, stv, ry, geom = "point")
# limit to 100 for ALCC
df$ry <- ifelse(df$ry > 100, 100, round(df$ry, 1))
qplot(data = df, stv, ry, geom = "point")
# get sample size
n <- nrow(df)
# Step 1 Transform (t for "transformed" added to x and y)
# for ALCC, soil test goes to Y-axis and RY goes to X-axis
xt <- asin(sqrt(df$ry / 100))
yt <- log(df$stv)
# Step 2 Center
sufficiency <- 95
adjust_by <- asin(sqrt(sufficiency / 100))
xt_centered <- xt - adjust_by
# Step 3 Correlation
pearson <- cor(xt_centered, yt, method = "pearson")
t_stat <- (pearson * sqrt(n - 2)) / sqrt(1 - pearson ^ 2)
pvalue <- pt(t_stat, df = n - 1, lower.tail = FALSE)
# Step 4 Means
mean_xt <- mean(xt_centered)
mean_yt <- mean(yt)
# Step 5 Fit linear model to transformed, centered data
fit_lm <- lm(yt ~ xt_centered)
AICc(fit_lm) # can we use AIC here?
intercept <- coef(fit_lm)[[1]]
slope <- coef(fit_lm)[[2]]
# Step 6 Rotate the regression (SMA)
# slope must come first
slope <- slope / pearson
intercept <- mean_yt - slope * mean_xt
# Step 7 Estimate Critical Soil Test Concentration
cstv <- exp(intercept)
cstv
# Step 8 Estimate the confidence interval
pred_yt <- intercept + slope * xt_centered
# 8.1 Create residuals
## residuals based on transformed soil test values
alcc_res <- yt - pred_yt
## or do we use residuals based on back-transformed soil test values?
alcc_res2 <- exp(yt) - exp(pred_yt)
# 8.2 Estimate CI
mse <- sum((yt - pred_yt) ^ 2) / (n - 2)
ssx <- var(xt_centered) * (n - 1)
confidence <- 95
se <- sqrt(mse * ((1 / n) + ((mean_xt ^ 2) / ssx)))
lower_cl <-
exp(intercept - se * qt(1 - (1 - confidence / 100) / 2,
df = n - 2))
upper_cl <- exp(intercept + se * qt(1 - (1 - confidence / 100) / 2,
df = n - 2))
# Step 9 Back-transform
# New RY values to create smoother curve to 0
tmp <- seq(0, 100, by = 0.05)
tmp <- df$stv
tmp2 <- asin(sqrt(tmp/100)) - asin(sqrt(sufficiency/100))
new_pred_y <- intercept + slope * tmp2
fitted_stv <- exp(new_pred_y)
fitted_ry <- 100 * (sin(adjust_by + ((new_pred_y - intercept) / slope))) ^ 2
fitted_stv <- round(fitted_stv, 1)
ry_res <-
tibble(stv = fitted_stv,
fitted_ry) %>%
left_join(df) %>%
drop_na() %>%
distinct(across(everything())) %>%
mutate(resid_ry = ry - fitted_ry)
ggplot() +
geom_point(aes(df$stv, df$ry)) +
geom_line(data = ry_res,
aes(stv, fitted_ry)) +
geom_point(data = ry_res,
aes(stv, fitted_ry),
color = "red") +
geom_hline(yintercept = sufficiency, lty = 2) +
geom_vline(xintercept = c(lower_cl, upper_cl), lty = 3) +
geom_vline(xintercept = cstv, color = "#CC0000") +
geom_point(aes(cstv, sufficiency), color = "#CC0000", size = 3) +
scale_x_continuous(breaks = seq(0, 300, 4))
ggplot() +
geom_point(aes(df$stv, df$ry)) +
geom_line(aes(fitted_stv, fitted_ry)) +
geom_point(aes(fitted_stv, fitted_ry), color = "red") +
geom_hline(yintercept = sufficiency, lty = 2) +
geom_vline(xintercept = c(lower_cl, upper_cl), lty = 3) +
geom_vline(xintercept = cstv, color = "#CC0000") +
geom_point(aes(cstv, sufficiency), color = "#CC0000", size = 3)
# logLik
res <- alcc_res ## Replace this line with extraction of residuals from ALCC
N <- length(res)
w <- rep_len(1, N)
zw <- w == 0
N <- sum(!zw)
val <- -N * (log(2 * pi) + 1 - log(N) - sum(log(w + zw))/N + log(sum(res^2)))/2
val # value is negative
# AIC