forked from OHI-Science/gal
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTR_INE.R
151 lines (105 loc) · 3.98 KB
/
TR_INE.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# 1 TOURISM AND RECREATION
library(tibble)
library(readr)
library(dplyr)
library(ggplot2)
#1.1Load wb_gdppcpp file to gapfill
tr_jobs_pct_tourism<-read_csv( "/Users/batume/Documents/R/GAL_git/region/layers/tr_jobs_pct_tourism.csv")
ttdi_galicia <- read_csv("~/Documents/R/GAL_git/prep/AO/ttdi_galicia.csv")
pib_INE<- tibble(
year = c(2022, 2021, 2020, 2019, 2018, 2017, 2016, 2015),
value = c(4.035, 5.900, -9.055, 1.393, 2.220, 2.914, 3.172, 5.116)
)
pib_INE<- pib_INE %>%
slice(rep(1:n(), each = 3)) %>%
mutate(rgn_id = rep(c(1, 2, 3), times = nrow(.) / 3))
write.csv(pib_INE, "/Users/batume/Documents/R/GAL_git/prep/TR/pib_INE.csv", row.names = FALSE)
pib_INE<-read_csv( "/Users/batume/Documents/R/GAL_git/prep/TR/pib_INE.csv")
######## 2 Gapfilling
# 2.1 `year` to character
pib_INE<- pib_INE %>% mutate(year = as.character(year))
ttdi_galicia <- ttdi_galicia %>% mutate(year = as.character(year))
tr_jobs_pct_tourism <- tr_jobs_pct_tourism %>% mutate(year = as.character(year))
# 2.2 Merge
tr_sust <- pib_INE %>%
full_join(ttdi_galicia, by = c("rgn_id", "year")) %>%
full_join(tr_jobs_pct_tourism, by = c("rgn_id", "year"))
#2.3 gapfill flags
tr_sust <- tr_sust %>%
mutate(
gapfilled = ifelse(is.na(score) & !is.na(Ep), "gapfilled", NA),
method = case_when(
is.na(score) & !is.na(Ep) & is.na(value) ~ "lm georegion + estimated GDP",
is.na(score) & !is.na(Ep) ~ "lm georegion + GDP",
TRUE ~ NA_character_
)
)
#2.4 Simple linear model using GDP (value) to predict S_score
mod_gdp <- lm(score ~ value, data = tr_sust, na.action = na.exclude)
sum(is.na(tr_sust$value))
summary(mod_gdp)
# Filter the rows for the years of interest (2020, 2022, 2023)
years_to_predict <- c("2020", "2022")
tr_sust_gapfilled <- tr_sust %>%
filter(year %in% years_to_predict & is.na(score)) %>%
mutate(S_score_predicted = predict(mod_gdp, newdata = .))
# Merge predictions back to the original dataset
tr_sust <- tr_sust %>%
left_join(tr_sust_gapfilled %>% select(year, rgn_id, S_score_predicted),
by = c("year", "rgn_id"))
# Fill missing S_score with predictions
tr_sust <- tr_sust %>%
mutate(S_score = ifelse(is.na(score), S_score_predicted, score))
# Subset
tr_sust_subset <- tr_sust %>%
select(S_score, Ep, rgn_id, value, year)
tr_sust_filtered <- tr_sust_subset %>%
filter(year %in% c(2018, 2019, 2020, 2021, 2022))
tr_sust_filtered <- tr_sust_filtered %>%
filter(rgn_id != 4)
###CALCUALTE STATUS
# Step 1: Calculate T_r
tr_sust <- tr_sust %>%
mutate(T_r = Ep * S_score)
# Step 2: Calculate T_90th
T_90th <- tr_sust %>%
summarize(T_90th = quantile(T_r, 0.9, na.rm = TRUE)) %>%
pull(T_90th)
# Step 3: Calculate x_tr
tr_sust <- tr_sust %>%
mutate(
x_tr = T_r / T_90th,
x_tr = ifelse(x_tr > 1, 1, x_tr)
)
print(tr_sust)
T_90th_by_rgn_id <- tr_sust %>%
group_by(rgn_id) %>%
summarize(T_90th = quantile(T_r, 0.9, na.rm = TRUE))
# Join T_90th_by_rgn_id to tr_sust to add the regional T_90th values
tr_sust <- tr_sust %>%
left_join(T_90th_by_rgn_id, by = "rgn_id")
# Calculate x_tr per region using the corresponding T_90th
tr_sust <- tr_sust %>%
mutate(
x_tr = T_r / T_90th, # Normalize T_r by the region-specific T_90th
x_tr = ifelse(x_tr > 1, 1, x_tr) # Cap x_tr at 1
)
# View the updated dataset with x_tr
print(tr_sust %>% select(rgn_id, year, T_r, T_90th, x_tr))
#CALCULATE TREND
# Step 4: Filter for the most recent 5 years
recent_years <- tr_sust %>%
filter(as.numeric(year) >= (max(as.numeric(year)) - 4))
# Step 5: Calculate Trend for each region
trend <- recent_years %>%
group_by(rgn_id) %>%
summarize(
slope = coef(lm(x_tr ~ as.numeric(year), na.action = na.exclude))[2], # Slope of x_tr over time
x_start = first(x_tr, order_by = as.numeric(year)), # x_tr value of the earliest year
trend = (slope * 5) / x_start # Proportional change over 5 years
) %>%
mutate(
trend = pmax(pmin(trend, 1.0), -1.0) # trend to [-1.0, 1.0]
)
# View the calculated trends
print(trend)