-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcovid.R
92 lines (77 loc) · 2.32 KB
/
covid.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
library(renv)
library(dplyr)
library(distill)
library(tidyverse)
library(magrittr)
dir_data = '~/github/covid/csse_covid_19_data/csse_covid_19_daily_reports'
# Older data is missing latitude and logitude
prep_df <- function(df){
if (ncol(df) == 6){
stopifnot(!('Latitude' %in% colnames(df)))
df$Latitude = NA
df$Longitude = NA
}
date = as.data.frame(str_split_fixed(df$`Last Update`,pattern = ' ', n = 2))[,1] %>% as.character()
if (str_detect(date[1], fixed('/'))){
df$date = mdy(date)
} else {
df$date = ymd(date)
}
return(df)
}
dfs = list.files(dir_data, full.names = T, pattern='csv$') %>%
lapply(., read_csv) %>%
lapply(., prep_df)
df = do.call(rbind, dfs)
colnames(df) %<>%
tolower() %>%
str_replace_all(fixed('/'), '_') %>%
str_replace_all(fixed(' '), '_')
View(df)
# Todo:
# Group by country/region
# Subtract from prior day
# Get the "new" cases
#
df %>%
filter(country_region == 'China') %>%
group_by(date) %>%
summarize(confirmed = sum(confirmed)) %>% View
ggplot(aes(x = date, y = confirmed)) +
geom_line(type = 'bar')
# Not sure I trust that data above
fp_data = '~/github/covid/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv'
dfc = read_csv(fp_data) %>%
gather(date_raw, confirmed, -`Province/State`, -`Country/Region`, -Lat, -Long) %>%
mutate(date = mdy(date_raw))
colnames(dfc) %<>%
tolower() %>%
str_replace_all(fixed('/'), '_') %>%
str_replace_all(fixed(' '), '_')
dfc
dfc %>%
filter(country_region == 'US') %>%
group_by(date) %>%
summarize(confirmed = sum(confirmed)) %>%
ggplot(aes(x = date, y = confirmed)) +
geom_line(type = 'bar') +
ggtitle('US')
# Day since 1
df_tmp <- dfc %>%
group_by(country_region, date) %>%
summarize(confirmed = sum(confirmed)) %>%
arrange(country_region, date) %>%
filter(confirmed > 0) %>%
mutate(day_since1 = row_number()) %>%
ungroup() %>%
#filter(country_region %in% c("China", 'US', 'Italy', 'France', 'Iran')) %>% #View
highlight_key(~country_region)
p <- ggplot(df_tmp, aes(x=day_since1, y=confirmed, color=country_region)) +
geom_line() +
ggtitle('Days Since first onset') +
theme(legend.position = "none")
gg <- ggplotly(p)
highlight(gg, 'plotly_hover',
selectize = T,
dynamic=T,
defaultValues = c('China', 'Italy', 'US'))