-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAKS_ICP_HW2.Rmd
221 lines (184 loc) · 9.5 KB
/
AKS_ICP_HW2.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
---
title: "International Climate Policy - Homework 2\nWorking document (R Markdown)"
author: "Amartya Kumar Sinha"
date: "2024-02-01"
output:
html_document: default
word_document: default
pdf_document: default
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
setwd("C:/Users/Amartya Kumar Sinha/OneDrive - The University of Chicago/IntlClimatePolicy_PPHA39930/Assignments/Assn2_IntlClimatePolicy")
library(dplyr)
library(ggplot2)
library (lfe)
```
```{r}
rep_nomiss <- read.csv("icp_indiv_2_dg2011_rep_nomiss.csv")
ave_temp <- read.csv("icp_indiv_2_county_avetemp.csv")
```
# Question 1
### Taking population weighted average across all temperature variables for the entire country
```{r}
# Creating a new dataframe where each row is a temperature range and its population-weighted average
temp_vars <- grep("tday", names(rep_nomiss), value = TRUE)
pop_weighted_avg <- sapply(temp_vars, function(x) weighted.mean(rep_nomiss[[x]], rep_nomiss$population, na.rm = TRUE))
```
# 1. a)
### Plotting histogram as required
```{r}
labels <- c("<10", "10–20", "20–30", "30–40", "40–50", "50–60", "60–70", "70–80", "80–90", ">90")
barplot(pop_weighted_avg,
main = "Population-Weighted Average Number of Days in \nEach Temperature Range (1968-2002 average)",
ylab = "",
xlab = "Annual distribution of daily mean temperatures (F)",
ylim = c(0, 80),
names.arg = labels,
las = 2, col = "lightyellow")
abline(h = seq(0, max(pop_weighted_avg), by = 10), col = "gray", lty = "dotted")
```
# 1. b) and c)
### Answering questions as required.
```{r}
# Calculating population-weighted average number of days above 90◦ F per year across the US
pop_weighted_avg_above_90 <- weighted.mean(rep_nomiss$tday_gt90, rep_nomiss$population, na.rm = TRUE)
# Finding county with the highest number of days above 90◦ F per year
highest_days_above_90 <- rep_nomiss[which.max(rep_nomiss$tday_gt90), "county"]
highest_days_above_90_count <- max(rep_nomiss$tday_gt90, na.rm = TRUE)
# Calculating the average number of days above 90◦F per year for each county over the sample period (1968-2002)
avg_days_above_90 <- aggregate(tday_gt90 ~ county, rep_nomiss, mean, na.rm = TRUE)
# Finding number of counties that have, on average over the sample period (1968-2002), experienced zero days above 90◦F per year
num_counties_zero_days_above_90 <- sum(avg_days_above_90$tday_gt90 == 0)
# Calculating the total number of counties
total_counties <- length(unique(rep_nomiss$county))
# Calculating the percentage of counties that have, on average over the sample period (1968-2002), experienced zero days above 90◦F per year
percentage_zero_days_above_90 <- (num_counties_zero_days_above_90 / total_counties) * 100
print(paste("The population-weighted average number of days above 90◦ F per year across the US is", round(pop_weighted_avg_above_90, 2)))
print(paste("The county with the highest number of days above 90◦ F per year is", highest_days_above_90, "with", round(highest_days_above_90_count, 2), "days"))
print(paste("The number of counties that have, on average over the sample period (1968-2002), experienced zero days above 90◦F per year is", num_counties_zero_days_above_90, "out of a total of", total_counties, "counties in the dataset, or about", round(percentage_zero_days_above_90, 2), "%"))
```
# Question 2
# 2. a)
```{r}
print(paste("The national average over-65 mortality rate is", round(mean(rep_nomiss$cruderate, na.rm = TRUE), 2), "deaths per 100,000 population"))
print(paste("The total number of deaths from 1968-2002 is", sum(rep_nomiss$deaths, na.rm = TRUE)))
```
# Question 3
Merging given datasets and creating variables as instructed
```{r}
# Merging dataframes on 'countycode'
merged_df <- merge(rep_nomiss, ave_temp, by = "countycode")
# Calculating sum of all days over 70◦ F and 80◦ F for each county
merged_df$hotdays <- rowSums(merged_df[,grep("tday_70_80|tday_80_90|tday_gt90", names(merged_df))], na.rm = TRUE)
merged_df$hotterdays <- rowSums(merged_df[,grep("tday_80_90|tday_gt90", names(merged_df))], na.rm = TRUE)
# Calculating average mortality rate, hotdays, and hotterdays for each county
avg_by_county <- aggregate(cbind(cruderate, hotdays, hotterdays) ~ countycode, merged_df, mean, na.rm = TRUE)
# Checking first few rows of the new dataframe
print(head(avg_by_county))
```
# 3. a)
### Plotting figure as required
```{r}
avg_by_county <- merge(avg_by_county, ave_temp, by = "countycode")
model <- lm(cruderate ~ normal_1981_2010, data = avg_by_county)
slope <- coef(model)[2]
ggplot(avg_by_county, aes(x = normal_1981_2010, y = cruderate)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
ggtitle("Relationship between average county temperatures and over-65 mortality rates") +
xlab("Average county temperatures (1981-2010)") +
ylab("Over 65 Mortality Rates") +
annotate("text", x = min(avg_by_county$normal_1981_2010), y = max(avg_by_county$cruderate), label = paste("Slope =", round(slope, 2)), hjust = 0, vjust = 1)
```
# 3. b)
### Plotting graphs as instructed: hot days first, and then hotter days
```{r}
model_hotdays <- lm(cruderate ~ hotdays, data = avg_by_county)
slope_hotdays <- coef(model_hotdays)[2]
rsq <- summary(model_hotdays)$r.squared
ggplot(avg_by_county, aes(x = hotdays, y = cruderate)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
ggtitle("Relationship between hot days and over-65 mortality rates") +
xlab("Hot Days") +
ylab("Over-65 Mortality Rates") +
annotate("text", x = min(avg_by_county$hotdays), y = max(avg_by_county$cruderate), label = paste("Slope =", round(slope_hotdays, 2)), hjust = 0, vjust = 3) +
annotate("text", x = min(avg_by_county$hotdays), y = max(avg_by_county$cruderate) - 10, label = paste("R-squared =", round(rsq, 2)), hjust = 0, vjust = 1)
```
```{r}
model_hotterdays <- lm(cruderate ~ hotterdays, data = avg_by_county)
slope_hotterdays <- coef(model_hotterdays)[2]
rsq <- summary(model_hotterdays)$r.squared
ggplot(avg_by_county, aes(x = hotterdays, y = cruderate)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
ggtitle("Relationship between hotter days and over-65 mortality rates") +
xlab("Hotter Days") +
ylab("Over-65 Mortality Rates") +
annotate("text", x = min(avg_by_county$hotterdays), y = max(avg_by_county$cruderate), label = paste("Slope =", round(slope_hotterdays, 2)), hjust = 0, vjust = 3) +
annotate("text", x = min(avg_by_county$hotterdays), y = max(avg_by_county$cruderate) - 10, label = paste("R-squared =", round(rsq, 2)), hjust = 0, vjust = 1)
```
# Question 4
### Data reload not needed since the dataframe "merged_df" exists consisting of the original two datasets merged together and the columns for hot days and hotter days are appended to it
```{r}
selected_counties <- c("Mobile County, AL", "Cook County, IL", "Los Angeles County, CA", "Miami-Dade County, FL")
# Creating new dataframe with selected counties
subset_q4 <- merged_df[merged_df$county.x %in% selected_counties, ]
# Inspecting subsetted dataframe
print(head(subset_q4))
```
# 4. a)
```{r}
model_hotterdays <- lm(cruderate ~ hotterdays, data = subset_q4)
slope_hotterdays <- coef(model_hotterdays)[2]
ggplot(subset_q4, aes(x = hotterdays, y = cruderate)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
ggtitle("Relationship between hotter days and over 65 mortality rates") +
xlab("Hotter Days") +
ylab("Over 65 Mortality Rates") +
annotate("text", x = min(subset_q4$hotterdays), y = max(subset_q4$cruderate), label = paste("Slope =", round(slope_hotterdays, 2)), hjust = 0, vjust = 1)
```
# 4. b)
```{r}
ggplot(subset_q4, aes(x = hotterdays, y = cruderate, color = county.x)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
ggtitle("County-based relationship between hotter days and over 65 mortality") +
xlab("Hotter Days") +
ylab("Over-65 Mortality Rates") +
theme(legend.position = "bottom")
```
# Question 5
```{r}
dg2011 <- felm (cruderate
~ tday_lt10 + tday_10_20 + tday_20_30 + tday_30_40 + tday_40_50
+ tday_50_60 + tday_70_80 + tday_80_90 + tday_gt90
+ prec_10_15 + prec_15_20 + prec_20_25 + prec_25_30 + prec_30_35
+ prec_35_40 + prec_40_45 + prec_45_50 + prec_50_55 + prec_55_60
+ prec_gt60
| countycode + ssyy | 0 | countycode,
data = merged_df, weight = merged_df$population)
summary(dg2011)
```
# 5. a)
```{r}
# Creating a dataframe with order of temperature bins, with the value of zero for the temperature bin of 60-70F
tempbins <- data.frame(
temperature_bins = factor(c("<10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", ">90"),
levels = c("<10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", ">90")),
estimates = c(3.7239, 2.6755, 3.5811, 1.8146, 1.4530, 0.2559, 0, 0.8364, 1.3660, 5.3466),
std_errors = c(1.3839, 1.1061, 0.8509, 0.6684, 0.5699, 0.3271, 0, 0.6805, 0.8861, 1.3828)
)
ggplot(tempbins, aes(x = temperature_bins, y = estimates)) +
geom_point() +
geom_errorbar(aes(ymin = estimates - 2*std_errors, ymax = estimates + 2*std_errors), width = 0.2) +
geom_line(aes(group = 1)) +
geom_text(aes(label = estimates), vjust = -1.5, hjust = 0.5) +
labs(x = "Temperature bins (F)",
y = "Over-65 mortality rate",
title = "Estimated impact of a day in 9 daily mean temperature (F) bins on\nannual over-65 mortality rate, relative to a day in the 60-70F bin\n(including +2 and -2 standard errors)")
```