Skip to content

Commit

Permalink
Updated country params + corrected everything
Browse files Browse the repository at this point in the history
  • Loading branch information
lfagliano committed Nov 14, 2023
1 parent 2a6719e commit d06fcec
Show file tree
Hide file tree
Showing 26 changed files with 100 additions and 424 deletions.
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ export("%>%")
export(acled_access)
export(acled_api)
export(acled_deletions_api)
export(acled_filter_actors)
export(acled_rounding)
export(acled_transform_interaction)
export(acled_transform_longer)
Expand Down
58 changes: 29 additions & 29 deletions R/acled_api.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#' @title Request data from ACLED API
#' @name acled_api
#' @description This function allows users to easily request data from the ACLED API. Users can include variables such as countries, regions, dates of interest and the type of file (monadic or dyadic). The function returns a tibble of the desired ACLED events.
#' @description This function allows users to easily request data from the ACLED API. Users can include variables such as country, regions, dates of interest and the type of file (monadic or dyadic). The function returns a tibble of the desired ACLED events.
#' @param email character string. Email associated with your ACLED account registered at <https://developer.acleddata.com>.
#' @param key character string. Access key associated with your ACLED account registered at <https://developer.acleddata.com>.
#' @param countries character vector. Default is NULL, which will return events for all countries. Pass a vector of country names to retrieve events from specific countries. The list of ACLED country names may be found via acledR::acled_countries.
#' @param regions vector of region names (character) or region codes (numeric). Default is NULL, which will return events for all regions. Pass a vector of regions names or codes to retrieve events from countries within specific regions. The list of ACLED regions may be found via acledR::acled_regions.
#' @param country character vector. Default is NULL, which will return events for all countries. Pass a vector of countries names to retrieve events from specific countries. The list of ACLED countries. names may be found via acledR::acled_countries.
#' @param regions vector of region names (character) or region codes (numeric). Default is NULL, which will return events for all regions. Pass a vector of regions names or codes to retrieve events from countries. within specific regions. The list of ACLED regions may be found via acledR::acled_regions.
#' @param start_date character string. Format 'yyyy-mm-dd'. The earliest date for which to return events. The default is `1997-01-01`, which is the earliest date available.
#' @param end_date character string. Format 'yyyy-mm-dd'. The latest date for which to return events. The default is Sys.Date(), which is the most present date.
#' @param timestamp numerical or character string. Provide a date or datetime written as either a character string of yyyy-mm-dd or as a numeric Unix timestamp to access all events added or updated after that date.
Expand All @@ -13,20 +13,20 @@
#' @param ... string. Any additional parameters that users would like to add to their API calls (e.g. interaction or ISO)
#' @param acled_access logical. If TRUE (default), you have used the acled_access function and the email and key arguments are not required.
#' @param log logical. If TRUE, it provides a dataframe with the countries and days requested, and how many calls it entails. The dataframe is provided INSTEAD of the normal ACLED dataset.
#' @param prompt logical. If TRUE (default), users will receive an interactive prompt providing information about their call (countries requested, number of country-days, and number of API calls required) and asking if they want to proceed with the call. If FALSE, the call continues without warning, but the call is split and returns a message specifying how many calls are being made.
#' @param prompt logical. If TRUE (default), users will receive an interactive prompt providing information about their call (countries requested, number of estimated events, and number of API calls required) and asking if they want to proceed with the call. If FALSE, the call continues without warning, but the call is split and returns a message specifying how many calls are being made.
#' @returns Returns a tibble of of ACLED events.
#' @family API and Access
#' @seealso
#' \itemize{
#' \item ACLED API guide. <https://acleddata.com/acleddatanew//wp-content/uploads/dlm_uploads/2021/11/API-User-Guide_Feb2022.pdf>
#' \item ACLED API guide. <https://apidocs.acleddata.com/>
#' }
#' @examples
#' \dontrun{
#'
#' # Get all the events coded by ACLED in Argentina from 01/01/2022 until 02/01/2022
#' # in dyadic-wide form
#' argen_acled <- acled_api(email = jane.doe.email, key = jane.doe.key,
#' countries = "Argentina", start_date = "2022-01-01", end_date="2022-02-01",
#' country = "Argentina", start_date = "2022-01-01", end_date="2022-02-01",
#' acled_access = FALSE)
#'
#' # tibble with all the events from Argentina where each row is one event.
Expand Down Expand Up @@ -55,7 +55,7 @@

acled_api <- function(email = NULL,
key = NULL,
countries = NULL,
country = NULL,
regions = NULL,
start_date = floor_date(Sys.Date(), "year") - years(1),
end_date = Sys.Date(),
Expand Down Expand Up @@ -83,13 +83,8 @@ acled_api <- function(email = NULL,

# Stoppers for typos ----

if(hasArg("country") | hasArg("Country")){
stop("Country is not a valid option. Please utilize \"countries\"")

}

if(hasArg("Countries")){
stop("Countries is not a valid option. Please utilize \"countries\", without capitalizing")
if(hasArg("Country")){
stop("Country is not a valid option. Please utilize \"country\", without capitalizing ")

}

Expand Down Expand Up @@ -142,8 +137,8 @@ acled_api <- function(email = NULL,
}
key_internal <- paste0("&key=", key)

if(!is.null(countries) & sum(unique(countries) %in% acledR::acled_countries[["country"]]) < length(unique(countries))) {
stop("One or more of the requested countries are not in ACLED's Country list. The full list of countries is available at 'acledR::acled_countries")
if(!is.null(country) & sum(unique(country) %in% acledR::acled_countries[["country"]]) < length(unique(country))) {
stop("One or more of the requested countries are not in ACLED's countries list. The full list of countries is available at 'acledR::acled_countries")
}

# Checking if regions are input incorrectly ----
Expand All @@ -157,17 +152,19 @@ acled_api <- function(email = NULL,


# Setup base data to check how many country-days are being requested
if(!is.null(countries) & is.null(regions)) {
df <- acledR::acled_countries %>%
filter(.data$country %in% countries)
if(!is.null(country) & is.null(regions)) {
test <- country

# Subset acled_multipliers (subset is faster than filter in our case) by relevant country & year

ex1_df <- subset(acledR::acled_multipliers, country %in% countries, select = country:avg_month_bin)
df <- acledR::acled_countries %>%
filter(.data$country %in% test)

# Subset acled_multipliers (subset is faster than filter in our case) by relevant country & year
ex1_df <- subset(acledR::acled_multipliers, country %in% test, select = country:avg_month_bin)
ex1_df <- subset(ex1_df, year <= lubridate::year(end_date) & year >= lubridate::year(start_date))
}

else if(is.null(countries) & !is.null(regions)) {
else if(is.null(country) & !is.null(regions)) {
if(is.numeric(regions)){
regions <- acledR::acled_regions %>%
filter(.data$region %in% regions) %>%
Expand All @@ -182,15 +179,17 @@ acled_api <- function(email = NULL,

}

else if(!is.null(countries) & !is.null(regions)){
else if(!is.null(country) & !is.null(regions)){

if(is.numeric(regions)){
regions <- acledR::acled_regions %>%
filter(.data$region %in% regions) %>%
pull(.data$region_name)}

test <- country

df <- acledR::acled_countries %>%
filter((.data$country %in% countries) | (.data$region %in% regions))
filter((.data$country %in% test) | (.data$region %in% regions))

ex1_df <- subset(acledR::acled_multipliers, country %in% unique(df$country), select = country:avg_month_bin)
ex1_df <- subset(ex1_df, year <= lubridate::year(end_date) & year >= lubridate::year(start_date))
Expand Down Expand Up @@ -249,6 +248,7 @@ acled_api <- function(email = NULL,
ee_events = avg_daily_bin * n_days
)


out <- df %>%
mutate(t_start = lubridate::as_date(start_date_check),
t_end = lubridate::as_date(end_date_check),
Expand All @@ -264,19 +264,18 @@ acled_api <- function(email = NULL,
# Note for how much data is being requested
size_note <- paste("Requesting data for",
length(unique(ex1_df$country)),
"countries.",
"country.",
"Accounting for the requested time period and ACLED coverage dates, this request includes approximately",
format(acled_rounding(sum(ex1_df$ee_events)), big.mark = ","), "events.")

message(size_note)


# Approx how many calls are required with 1 call sized at 600k country-days - Increase in the call size thanks to the more approximate approach.
# bcse of my testing, at around 900k the call falls.
# Current ceilling 400k
time_units <- ceiling(sum(ex1_df$ee_events) / 400000)

# Split call into roughly equally sized groups depending on how many country-days are in each country
# This randomly assigns countries into bins
# This randomly assigns country into bins
out_groups <- split(out, sample(1:time_units, nrow(out), replace = T))

if(log == T){
Expand Down Expand Up @@ -321,7 +320,7 @@ acled_api <- function(email = NULL,
# }

# Where
## Countries
## country

countries_internal <- vector("list", length = length(out_groups))
for(i in 1:length(out_groups)){
Expand Down Expand Up @@ -435,6 +434,7 @@ acled_api <- function(email = NULL,
event_types_internal, ..., "&limit=0")
}


# Loop through the api requests
response <- vector("list", length = length(out_groups))
message("Processing API request")
Expand Down
53 changes: 0 additions & 53 deletions R/acled_filter_actors.R

This file was deleted.

2 changes: 1 addition & 1 deletion R/acled_transform_interaction.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#'
#' # Load data frame
#' argen_acled <- acled_api(email = jane.doe.email, key = jane.doe.key,
#' countries = "Argentina", start_date = "2022-01-01", end_date="2022-02-01",
#' country = "Argentina", start_date = "2022-01-01", end_date="2022-02-01",
#' acled_access = FALSE)
#'
#' # Transform the interactions
Expand Down
8 changes: 6 additions & 2 deletions R/acled_transform_longer.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' @family Data Manipulation
#' @examples
#' \dontrun{
#' #argen_acled <- acled_api(countries = "Argentina",start_date = "2022-01-01",
#' #argen_acled <- acled_api(country = "Argentina",start_date = "2022-01-01",
#' # end_date="2022-02-01", acled_access = T, prompt = F)
#'
#' #argen_acled_long_actors <- acled_transform_wide_to_long(argen_acled,
Expand Down Expand Up @@ -67,7 +67,11 @@ acled_transform_longer <- function(data,type="full_actors") {
mutate(actor = str_trim(actor)) %>%
pivot_longer(cols = c("inter1", "inter2"),names_to = "inter_type",values_to = "inter") %>%
filter(str_sub(type_of_actor,start=nchar(type_of_actor)) == str_sub(inter_type, start=nchar(inter_type))) %>%
relocate(c("inter_type","inter"),.after="actor")
relocate(c("inter_type","inter"),.after="actor") %>%
# Removing inters when the actor is an assoc_actor_1/2
mutate(inter = case_when(
str_detect(type_of_actor, "assoc_*") ~ NA,
TRUE ~ inter))

message("Be aware, inter1 and inter2 represent the actor type of actor1 and actor2 respectively.")

Expand Down
13 changes: 10 additions & 3 deletions R/acled_transform_wider.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#' @family Data Manipulation
#' @examples
#' \dontrun{
#' #argen_acled <- acled_api(countries = "Argentina",start_date = "2022-01-01",
#' #argen_acled <- acled_api(country = "Argentina",start_date = "2022-01-01",
#' # end_date="2022-02-01", acled_access = T, prompt = F)
#' #argen_acled_long_actors <- acled_transform_longer(argen_acled,
#' # type = "full_actor") # Transforming the data to long form
Expand Down Expand Up @@ -60,12 +60,14 @@ acled_transform_wider <- function(data, type = "full_actors") {
# Transform inter into character for collapsing
mutate(inter1 = as.character(inter1),
inter2 = as.character(inter2)) %>%
mutate(inter1 = replace_na(inter1, ""))%>%
mutate(inter2 = replace_na(inter2, ""))%>%
group_by(across(c(-actor1,-actor2, -inter1, -inter2, -assoc_actor_1, -assoc_actor_2))) %>%
# Collapse repeated inters and actors
summarise(actor1 = str_c(actor1, collapse = ""),
actor2 = str_c(actor2, collapse = ""),
inter1 = str_trim(str_remove_all(str_c(inter1, collapse = " "), "9999")),
inter2 = str_trim(str_remove_all(str_c(inter2, collapse = " "), "9999")),
inter1 = str_trim(str_remove_all(str_c(inter1, collapse = " "), "9999|\\s0\\s")),
inter2 = str_trim(str_remove_all(str_c(inter2, collapse = " "), "9999|\\s0\\s")),
assoc_actor_1 = str_c(assoc_actor_1, collapse = ""),
assoc_actor_2 = str_c(assoc_actor_2, collapse = "")) %>%
ungroup() %>%
Expand All @@ -76,6 +78,7 @@ acled_transform_wider <- function(data, type = "full_actors") {
actor1 = na_if(actor1,""),
assoc_actor_1 = na_if(assoc_actor_1,""),
assoc_actor_2 = na_if(assoc_actor_2,""),
inter1 = replace_na(inter1, 0),
inter2 = replace_na(inter2, 0)) %>%
# Match column structure for an acled dataset
select(names(acledR::acled_old_dummy))
Expand All @@ -99,6 +102,8 @@ acled_transform_wider <- function(data, type = "full_actors") {
# Transform inter into character for collapsing
mutate(inter1 = as.character(inter1),
inter2 = as.character(inter2)) %>%
mutate(inter1 = replace_na(inter1, ""))%>%
mutate(inter2 = replace_na(inter2, ""))%>%
group_by(across(c(-actor1,-actor2, -inter1, -inter2))) %>%
# Collapse repeated inters and actors
summarise(actor1 = str_c(actor1, collapse = ""),
Expand All @@ -114,6 +119,7 @@ acled_transform_wider <- function(data, type = "full_actors") {
actor1 = na_if(actor1,""),
assoc_actor_1 = na_if(assoc_actor_1,""),
assoc_actor_2 = na_if(assoc_actor_2,""),
inter1 = replace_na(inter1, 0),
inter2 = replace_na(inter2, 0))%>%
# Match column structure for an acled dataset
select(names(acledR::acled_old_dummy))
Expand Down Expand Up @@ -141,6 +147,7 @@ acled_transform_wider <- function(data, type = "full_actors") {
actor1 = na_if(actor1,""),
assoc_actor_1 = na_if(assoc_actor_1,""),
assoc_actor_2 = na_if(assoc_actor_2,""),
inter1 = replace_na(inter1, 0),
inter2 = replace_na(inter2, 0))%>%
# Match column structure for an acled dataset
select(names(acledR::acled_old_dummy))
Expand Down
2 changes: 1 addition & 1 deletion R/acled_update.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ acled_update <- function(df,
key = key,
start_date = start_date,
end_date = end_date,
countries = additional_countries,
country = additional_countries,
regions = regions,
event_types = event_types,
acled_access = acled_access,
Expand Down
54 changes: 0 additions & 54 deletions R/multipliertest.R

This file was deleted.

Loading

0 comments on commit d06fcec

Please sign in to comment.