diff --git a/.Rbuildignore b/.Rbuildignore old mode 100644 new mode 100755 index 91114bf..ad352f1 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,2 +1,22 @@ ^.*\.Rproj$ ^\.Rproj\.user$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ +^pkgdown/$ +^doc$ +^Meta$ +^\.github$ +^codecov\.yml$ +^vignettes/articles$ +^LICENSE\.md$ +^README.Rmd +^test.R +^.trackdown +^trackdown_uploads.R +.covrignore +acled_analysis.Rmd +acled_generate_counts.R +acled_generate_movers.R +acled_report_api.R +acled_actor_concentration.R diff --git a/.covrignore b/.covrignore new file mode 100644 index 0000000..43b4bbd --- /dev/null +++ b/.covrignore @@ -0,0 +1,5 @@ +R/acled_help.R +R/acled_generate_counts.R +R/acled_generate_movers.R +R/acled_actor_concentration.R +R/acled_report_api.R diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100755 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..e4f065c --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,51 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + EMAIL_ADDRESS_EXAMPLES: ${{ secrets.EMAIL_ADDRESS_EXAMPLES}} + EXAMPLES_KEY: ${{ secrets.EXAMPLES_KEY}} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true \ No newline at end of file diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100755 index 0000000..691121e --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,49 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + release: + types: [published] + workflow_dispatch: + +name: pkgdown + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: load enviornment variables + run: | + echo "EMAIL_ADDRESS_EXAMPLES=${{ secrets.EMAIL_ADDRESS_EXAMPLES}}" >> .Renviron + echo "EXAMPLES_KEY=${{ secrets.EXAMPLES_KEY}}" >> .Renviron + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.4.1 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml new file mode 100644 index 0000000..8be7285 --- /dev/null +++ b/.github/workflows/test-coverage.yaml @@ -0,0 +1,52 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: test-coverage + +jobs: + test-coverage: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + EMAIL_ADDRESS_EXAMPLES: ${{ secrets.EMAIL_ADDRESS_EXAMPLES}} + EXAMPLES_KEY: ${{ secrets.EXAMPLES_KEY}} + + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::covr + needs: coverage + + - name: Test coverage + run: | + covr::codecov( + quiet = FALSE, + clean = FALSE, + install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") + ) + shell: Rscript {0} + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v3 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 index 5b6a065..65c8cfe --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,7 @@ .Rhistory .RData .Ruserdata +.Rprofile +.RDataTmp +.Renviron +.DS_Store diff --git a/DESCRIPTION b/DESCRIPTION index 990d92c..464eeba 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,20 +1,67 @@ Package: acledR Type: Package -Title: Manipulate ACLED Data +Title: Manipulate 'ACLED' Data Version: 0.1.0 -Author: Trey Billing -Maintainer: Trey Billing -Description: More about what it does (maybe more than one line) - Use four spaces when indenting paragraphs within the Description. -License: What license is it under? +Authors@R: c( + person(given = "Lucas", + family = "Fagliano", + role = "aut", + email = "l.fagliano@acleddata.com"), + person(given = "Trey", + family = "Billing", + role = "aut", + email = "t.billing@acleddata.com"), + person(given = "Rachel", + family = "Goodman", + role = "aut", + email = "goodman.r@wustl.edu"), + person(given = "Katayoun", + family = "Kishi", + role = "aut", + email = "k.kishi@acleddata.com"), + person(given = "Michael", + family = "Start", + role = "aut", + email = "m.start@acleddata.com"), + person(given = "", + family = "ACLED", + role = c("cph", "cre"), + email ="data@acleddata.com")) +Description: The package allows users to easily interact with 'ACLED' data by providing wrappers for the API and other functions to manipulate 'ACLED' data. +License: GPL (>= 3) Encoding: UTF-8 LazyData: true Imports: dplyr, + methods, + httr, lubridate, + stringr, + tidyr, + magrittr, purrr, - slider, - tidyr -RoxygenNote: 7.1.1 + rlang, + utils +RoxygenNote: 7.2.3 Depends: - R (>= 2.10) + R (>= 3.5.0) +URL: https://github.com/ACLED/acledR, https://acled.github.io/acledR/ +BugReports: https://github.com/ACLED/acledR/issues +Suggests: + knitr, + janitor, + rmarkdown, + readr, + kableExtra, + ggplot2, + covr, + here, + secret, + sf, + raster, + forcats, + igraph, + sjmisc, + testthat (>= 3.0.0) +VignetteBuilder: knitr +Config/testthat/edition: 3 diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..175443c --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,595 @@ +GNU General Public License +========================== + +_Version 3, 29 June 2007_ +_Copyright © 2007 Free Software Foundation, Inc. <>_ + +Everyone is permitted to copy and distribute verbatim copies of this license +document, but changing it is not allowed. + +## Preamble + +The GNU General Public License is a free, copyleft license for software and other +kinds of works. + +The licenses for most software and other practical works are designed to take away +your freedom to share and change the works. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change all versions of a +program--to make sure it remains free software for all its users. We, the Free +Software Foundation, use the GNU General Public License for most of our software; it +applies also to any other work released this way by its authors. You can apply it to +your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our General +Public Licenses are designed to make sure that you have the freedom to distribute +copies of free software (and charge for them if you wish), that you receive source +code or can get it if you want it, that you can change the software or use pieces of +it in new free programs, and that you know you can do these things. + +To protect your rights, we need to prevent others from denying you these rights or +asking you to surrender the rights. Therefore, you have certain responsibilities if +you distribute copies of the software, or if you modify it: responsibilities to +respect the freedom of others. + +For example, if you distribute copies of such a program, whether gratis or for a fee, +you must pass on to the recipients the same freedoms that you received. You must make +sure that they, too, receive or can get the source code. And you must show them these +terms so they know their rights. + +Developers that use the GNU GPL protect your rights with two steps: **(1)** assert +copyright on the software, and **(2)** offer you this License giving you legal permission +to copy, distribute and/or modify it. + +For the developers' and authors' protection, the GPL clearly explains that there is +no warranty for this free software. For both users' and authors' sake, the GPL +requires that modified versions be marked as changed, so that their problems will not +be attributed erroneously to authors of previous versions. + +Some devices are designed to deny users access to install or run modified versions of +the software inside them, although the manufacturer can do so. This is fundamentally +incompatible with the aim of protecting users' freedom to change the software. The +systematic pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we have designed +this version of the GPL to prohibit the practice for those products. If such problems +arise substantially in other domains, we stand ready to extend this provision to +those domains in future versions of the GPL, as needed to protect the freedom of +users. + +Finally, every program is threatened constantly by software patents. States should +not allow patents to restrict development and use of software on general-purpose +computers, but in those that do, we wish to avoid the special danger that patents +applied to a free program could make it effectively proprietary. To prevent this, the +GPL assures that patents cannot be used to render the program non-free. + +The precise terms and conditions for copying, distribution and modification follow. + +## TERMS AND CONDITIONS + +### 0. Definitions + +“This License” refers to version 3 of the GNU General Public License. + +“Copyright” also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + +“The Program” refers to any copyrightable work licensed under this +License. Each licensee is addressed as “you”. “Licensees” and +“recipients” may be individuals or organizations. + +To “modify” a work means to copy from or adapt all or part of the work in +a fashion requiring copyright permission, other than the making of an exact copy. The +resulting work is called a “modified version” of the earlier work or a +work “based on” the earlier work. + +A “covered work” means either the unmodified Program or a work based on +the Program. + +To “propagate” a work means to do anything with it that, without +permission, would make you directly or secondarily liable for infringement under +applicable copyright law, except executing it on a computer or modifying a private +copy. Propagation includes copying, distribution (with or without modification), +making available to the public, and in some countries other activities as well. + +To “convey” a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through a computer +network, with no transfer of a copy, is not conveying. + +An interactive user interface displays “Appropriate Legal Notices” to the +extent that it includes a convenient and prominently visible feature that **(1)** +displays an appropriate copyright notice, and **(2)** tells the user that there is no +warranty for the work (except to the extent that warranties are provided), that +licensees may convey the work under this License, and how to view a copy of this +License. If the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + +### 1. Source Code + +The “source code” for a work means the preferred form of the work for +making modifications to it. “Object code” means any non-source form of a +work. + +A “Standard Interface” means an interface that either is an official +standard defined by a recognized standards body, or, in the case of interfaces +specified for a particular programming language, one that is widely used among +developers working in that language. + +The “System Libraries” of an executable work include anything, other than +the work as a whole, that **(a)** is included in the normal form of packaging a Major +Component, but which is not part of that Major Component, and **(b)** serves only to +enable use of the work with that Major Component, or to implement a Standard +Interface for which an implementation is available to the public in source code form. +A “Major Component”, in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system (if any) on which +the executable work runs, or a compiler used to produce the work, or an object code +interpreter used to run it. + +The “Corresponding Source” for a work in object code form means all the +source code needed to generate, install, and (for an executable work) run the object +code and to modify the work, including scripts to control those activities. However, +it does not include the work's System Libraries, or general-purpose tools or +generally available free programs which are used unmodified in performing those +activities but which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for the work, and +the source code for shared libraries and dynamically linked subprograms that the work +is specifically designed to require, such as by intimate data communication or +control flow between those subprograms and other parts of the work. + +The Corresponding Source need not include anything that users can regenerate +automatically from other parts of the Corresponding Source. + +The Corresponding Source for a work in source code form is that same work. + +### 2. Basic Permissions + +All rights granted under this License are granted for the term of copyright on the +Program, and are irrevocable provided the stated conditions are met. This License +explicitly affirms your unlimited permission to run the unmodified Program. The +output from running a covered work is covered by this License only if the output, +given its content, constitutes a covered work. This License acknowledges your rights +of fair use or other equivalent, as provided by copyright law. + +You may make, run and propagate covered works that you do not convey, without +conditions so long as your license otherwise remains in force. You may convey covered +works to others for the sole purpose of having them make modifications exclusively +for you, or provide you with facilities for running those works, provided that you +comply with the terms of this License in conveying all material for which you do not +control copyright. Those thus making or running the covered works for you must do so +exclusively on your behalf, under your direction and control, on terms that prohibit +them from making any copies of your copyrighted material outside their relationship +with you. + +Conveying under any other circumstances is permitted solely under the conditions +stated below. Sublicensing is not allowed; section 10 makes it unnecessary. + +### 3. Protecting Users' Legal Rights From Anti-Circumvention Law + +No covered work shall be deemed part of an effective technological measure under any +applicable law fulfilling obligations under article 11 of the WIPO copyright treaty +adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention +of such measures. + +When you convey a covered work, you waive any legal power to forbid circumvention of +technological measures to the extent such circumvention is effected by exercising +rights under this License with respect to the covered work, and you disclaim any +intention to limit operation or modification of the work as a means of enforcing, +against the work's users, your or third parties' legal rights to forbid circumvention +of technological measures. + +### 4. Conveying Verbatim Copies + +You may convey verbatim copies of the Program's source code as you receive it, in any +medium, provided that you conspicuously and appropriately publish on each copy an +appropriate copyright notice; keep intact all notices stating that this License and +any non-permissive terms added in accord with section 7 apply to the code; keep +intact all notices of the absence of any warranty; and give all recipients a copy of +this License along with the Program. + +You may charge any price or no price for each copy that you convey, and you may offer +support or warranty protection for a fee. + +### 5. Conveying Modified Source Versions + +You may convey a work based on the Program, or the modifications to produce it from +the Program, in the form of source code under the terms of section 4, provided that +you also meet all of these conditions: + +* **a)** The work must carry prominent notices stating that you modified it, and giving a +relevant date. +* **b)** The work must carry prominent notices stating that it is released under this +License and any conditions added under section 7. This requirement modifies the +requirement in section 4 to “keep intact all notices”. +* **c)** You must license the entire work, as a whole, under this License to anyone who +comes into possession of a copy. This License will therefore apply, along with any +applicable section 7 additional terms, to the whole of the work, and all its parts, +regardless of how they are packaged. This License gives no permission to license the +work in any other way, but it does not invalidate such permission if you have +separately received it. +* **d)** If the work has interactive user interfaces, each must display Appropriate Legal +Notices; however, if the Program has interactive interfaces that do not display +Appropriate Legal Notices, your work need not make them do so. + +A compilation of a covered work with other separate and independent works, which are +not by their nature extensions of the covered work, and which are not combined with +it such as to form a larger program, in or on a volume of a storage or distribution +medium, is called an “aggregate” if the compilation and its resulting +copyright are not used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work in an aggregate +does not cause this License to apply to the other parts of the aggregate. + +### 6. Conveying Non-Source Forms + +You may convey a covered work in object code form under the terms of sections 4 and +5, provided that you also convey the machine-readable Corresponding Source under the +terms of this License, in one of these ways: + +* **a)** Convey the object code in, or embodied in, a physical product (including a +physical distribution medium), accompanied by the Corresponding Source fixed on a +durable physical medium customarily used for software interchange. +* **b)** Convey the object code in, or embodied in, a physical product (including a +physical distribution medium), accompanied by a written offer, valid for at least +three years and valid for as long as you offer spare parts or customer support for +that product model, to give anyone who possesses the object code either **(1)** a copy of +the Corresponding Source for all the software in the product that is covered by this +License, on a durable physical medium customarily used for software interchange, for +a price no more than your reasonable cost of physically performing this conveying of +source, or **(2)** access to copy the Corresponding Source from a network server at no +charge. +* **c)** Convey individual copies of the object code with a copy of the written offer to +provide the Corresponding Source. This alternative is allowed only occasionally and +noncommercially, and only if you received the object code with such an offer, in +accord with subsection 6b. +* **d)** Convey the object code by offering access from a designated place (gratis or for +a charge), and offer equivalent access to the Corresponding Source in the same way +through the same place at no further charge. You need not require recipients to copy +the Corresponding Source along with the object code. If the place to copy the object +code is a network server, the Corresponding Source may be on a different server +(operated by you or a third party) that supports equivalent copying facilities, +provided you maintain clear directions next to the object code saying where to find +the Corresponding Source. Regardless of what server hosts the Corresponding Source, +you remain obligated to ensure that it is available for as long as needed to satisfy +these requirements. +* **e)** Convey the object code using peer-to-peer transmission, provided you inform +other peers where the object code and Corresponding Source of the work are being +offered to the general public at no charge under subsection 6d. + +A separable portion of the object code, whose source code is excluded from the +Corresponding Source as a System Library, need not be included in conveying the +object code work. + +A “User Product” is either **(1)** a “consumer product”, which +means any tangible personal property which is normally used for personal, family, or +household purposes, or **(2)** anything designed or sold for incorporation into a +dwelling. In determining whether a product is a consumer product, doubtful cases +shall be resolved in favor of coverage. For a particular product received by a +particular user, “normally used” refers to a typical or common use of +that class of product, regardless of the status of the particular user or of the way +in which the particular user actually uses, or expects or is expected to use, the +product. A product is a consumer product regardless of whether the product has +substantial commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + +“Installation Information” for a User Product means any methods, +procedures, authorization keys, or other information required to install and execute +modified versions of a covered work in that User Product from a modified version of +its Corresponding Source. The information must suffice to ensure that the continued +functioning of the modified object code is in no case prevented or interfered with +solely because modification has been made. + +If you convey an object code work under this section in, or with, or specifically for +use in, a User Product, and the conveying occurs as part of a transaction in which +the right of possession and use of the User Product is transferred to the recipient +in perpetuity or for a fixed term (regardless of how the transaction is +characterized), the Corresponding Source conveyed under this section must be +accompanied by the Installation Information. But this requirement does not apply if +neither you nor any third party retains the ability to install modified object code +on the User Product (for example, the work has been installed in ROM). + +The requirement to provide Installation Information does not include a requirement to +continue to provide support service, warranty, or updates for a work that has been +modified or installed by the recipient, or for the User Product in which it has been +modified or installed. Access to a network may be denied when the modification itself +materially and adversely affects the operation of the network or violates the rules +and protocols for communication across the network. + +Corresponding Source conveyed, and Installation Information provided, in accord with +this section must be in a format that is publicly documented (and with an +implementation available to the public in source code form), and must require no +special password or key for unpacking, reading or copying. + +### 7. Additional Terms + +“Additional permissions” are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. Additional +permissions that are applicable to the entire Program shall be treated as though they +were included in this License, to the extent that they are valid under applicable +law. If additional permissions apply only to part of the Program, that part may be +used separately under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + +When you convey a copy of a covered work, you may at your option remove any +additional permissions from that copy, or from any part of it. (Additional +permissions may be written to require their own removal in certain cases when you +modify the work.) You may place additional permissions on material, added by you to a +covered work, for which you have or can give appropriate copyright permission. + +Notwithstanding any other provision of this License, for material you add to a +covered work, you may (if authorized by the copyright holders of that material) +supplement the terms of this License with terms: + +* **a)** Disclaiming warranty or limiting liability differently from the terms of +sections 15 and 16 of this License; or +* **b)** Requiring preservation of specified reasonable legal notices or author +attributions in that material or in the Appropriate Legal Notices displayed by works +containing it; or +* **c)** Prohibiting misrepresentation of the origin of that material, or requiring that +modified versions of such material be marked in reasonable ways as different from the +original version; or +* **d)** Limiting the use for publicity purposes of names of licensors or authors of the +material; or +* **e)** Declining to grant rights under trademark law for use of some trade names, +trademarks, or service marks; or +* **f)** Requiring indemnification of licensors and authors of that material by anyone +who conveys the material (or modified versions of it) with contractual assumptions of +liability to the recipient, for any liability that these contractual assumptions +directly impose on those licensors and authors. + +All other non-permissive additional terms are considered “further +restrictions” within the meaning of section 10. If the Program as you received +it, or any part of it, contains a notice stating that it is governed by this License +along with a term that is a further restriction, you may remove that term. If a +license document contains a further restriction but permits relicensing or conveying +under this License, you may add to a covered work material governed by the terms of +that license document, provided that the further restriction does not survive such +relicensing or conveying. + +If you add terms to a covered work in accord with this section, you must place, in +the relevant source files, a statement of the additional terms that apply to those +files, or a notice indicating where to find the applicable terms. + +Additional terms, permissive or non-permissive, may be stated in the form of a +separately written license, or stated as exceptions; the above requirements apply +either way. + +### 8. Termination + +You may not propagate or modify a covered work except as expressly provided under +this License. Any attempt otherwise to propagate or modify it is void, and will +automatically terminate your rights under this License (including any patent licenses +granted under the third paragraph of section 11). + +However, if you cease all violation of this License, then your license from a +particular copyright holder is reinstated **(a)** provisionally, unless and until the +copyright holder explicitly and finally terminates your license, and **(b)** permanently, +if the copyright holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + +Moreover, your license from a particular copyright holder is reinstated permanently +if the copyright holder notifies you of the violation by some reasonable means, this +is the first time you have received notice of violation of this License (for any +work) from that copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + +Termination of your rights under this section does not terminate the licenses of +parties who have received copies or rights from you under this License. If your +rights have been terminated and not permanently reinstated, you do not qualify to +receive new licenses for the same material under section 10. + +### 9. Acceptance Not Required for Having Copies + +You are not required to accept this License in order to receive or run a copy of the +Program. Ancillary propagation of a covered work occurring solely as a consequence of +using peer-to-peer transmission to receive a copy likewise does not require +acceptance. However, nothing other than this License grants you permission to +propagate or modify any covered work. These actions infringe copyright if you do not +accept this License. Therefore, by modifying or propagating a covered work, you +indicate your acceptance of this License to do so. + +### 10. Automatic Licensing of Downstream Recipients + +Each time you convey a covered work, the recipient automatically receives a license +from the original licensors, to run, modify and propagate that work, subject to this +License. You are not responsible for enforcing compliance by third parties with this +License. + +An “entity transaction” is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an organization, or +merging organizations. If propagation of a covered work results from an entity +transaction, each party to that transaction who receives a copy of the work also +receives whatever licenses to the work the party's predecessor in interest had or +could give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if the predecessor +has it or can get it with reasonable efforts. + +You may not impose any further restrictions on the exercise of the rights granted or +affirmed under this License. For example, you may not impose a license fee, royalty, +or other charge for exercise of rights granted under this License, and you may not +initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging +that any patent claim is infringed by making, using, selling, offering for sale, or +importing the Program or any portion of it. + +### 11. Patents + +A “contributor” is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The work thus +licensed is called the contributor's “contributor version”. + +A contributor's “essential patent claims” are all patent claims owned or +controlled by the contributor, whether already acquired or hereafter acquired, that +would be infringed by some manner, permitted by this License, of making, using, or +selling its contributor version, but do not include claims that would be infringed +only as a consequence of further modification of the contributor version. For +purposes of this definition, “control” includes the right to grant patent +sublicenses in a manner consistent with the requirements of this License. + +Each contributor grants you a non-exclusive, worldwide, royalty-free patent license +under the contributor's essential patent claims, to make, use, sell, offer for sale, +import and otherwise run, modify and propagate the contents of its contributor +version. + +In the following three paragraphs, a “patent license” is any express +agreement or commitment, however denominated, not to enforce a patent (such as an +express permission to practice a patent or covenant not to sue for patent +infringement). To “grant” such a patent license to a party means to make +such an agreement or commitment not to enforce a patent against the party. + +If you convey a covered work, knowingly relying on a patent license, and the +Corresponding Source of the work is not available for anyone to copy, free of charge +and under the terms of this License, through a publicly available network server or +other readily accessible means, then you must either **(1)** cause the Corresponding +Source to be so available, or **(2)** arrange to deprive yourself of the benefit of the +patent license for this particular work, or **(3)** arrange, in a manner consistent with +the requirements of this License, to extend the patent license to downstream +recipients. “Knowingly relying” means you have actual knowledge that, but +for the patent license, your conveying the covered work in a country, or your +recipient's use of the covered work in a country, would infringe one or more +identifiable patents in that country that you have reason to believe are valid. + +If, pursuant to or in connection with a single transaction or arrangement, you +convey, or propagate by procuring conveyance of, a covered work, and grant a patent +license to some of the parties receiving the covered work authorizing them to use, +propagate, modify or convey a specific copy of the covered work, then the patent +license you grant is automatically extended to all recipients of the covered work and +works based on it. + +A patent license is “discriminatory” if it does not include within the +scope of its coverage, prohibits the exercise of, or is conditioned on the +non-exercise of one or more of the rights that are specifically granted under this +License. You may not convey a covered work if you are a party to an arrangement with +a third party that is in the business of distributing software, under which you make +payment to the third party based on the extent of your activity of conveying the +work, and under which the third party grants, to any of the parties who would receive +the covered work from you, a discriminatory patent license **(a)** in connection with +copies of the covered work conveyed by you (or copies made from those copies), or **(b)** +primarily for and in connection with specific products or compilations that contain +the covered work, unless you entered into that arrangement, or that patent license +was granted, prior to 28 March 2007. + +Nothing in this License shall be construed as excluding or limiting any implied +license or other defenses to infringement that may otherwise be available to you +under applicable patent law. + +### 12. No Surrender of Others' Freedom + +If conditions are imposed on you (whether by court order, agreement or otherwise) +that contradict the conditions of this License, they do not excuse you from the +conditions of this License. If you cannot convey a covered work so as to satisfy +simultaneously your obligations under this License and any other pertinent +obligations, then as a consequence you may not convey it at all. For example, if you +agree to terms that obligate you to collect a royalty for further conveying from +those to whom you convey the Program, the only way you could satisfy both those terms +and this License would be to refrain entirely from conveying the Program. + +### 13. Use with the GNU Affero General Public License + +Notwithstanding any other provision of this License, you have permission to link or +combine any covered work with a work licensed under version 3 of the GNU Affero +General Public License into a single combined work, and to convey the resulting work. +The terms of this License will continue to apply to the part which is the covered +work, but the special requirements of the GNU Affero General Public License, section +13, concerning interaction through a network will apply to the combination as such. + +### 14. Revised Versions of this License + +The Free Software Foundation may publish revised and/or new versions of the GNU +General Public License from time to time. Such new versions will be similar in spirit +to the present version, but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Program specifies that +a certain numbered version of the GNU General Public License “or any later +version” applies to it, you have the option of following the terms and +conditions either of that numbered version or of any later version published by the +Free Software Foundation. If the Program does not specify a version number of the GNU +General Public License, you may choose any version ever published by the Free +Software Foundation. + +If the Program specifies that a proxy can decide which future versions of the GNU +General Public License can be used, that proxy's public statement of acceptance of a +version permanently authorizes you to choose that version for the Program. + +Later license versions may give you additional or different permissions. However, no +additional obligations are imposed on any author or copyright holder as a result of +your choosing to follow a later version. + +### 15. Disclaimer of Warranty + +THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER +EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE +QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE +DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +### 16. Limitation of Liability + +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY +COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS +PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, +INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE +OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE +WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + +### 17. Interpretation of Sections 15 and 16 + +If the disclaimer of warranty and limitation of liability provided above cannot be +given local legal effect according to their terms, reviewing courts shall apply local +law that most closely approximates an absolute waiver of all civil liability in +connection with the Program, unless a warranty or assumption of liability accompanies +a copy of the Program in return for a fee. + +_END OF TERMS AND CONDITIONS_ + +## How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest possible use to +the public, the best way to achieve this is to make it free software which everyone +can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to attach them +to the start of each source file to most effectively state the exclusion of warranty; +and each file should have at least the “copyright” line and a pointer to +where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + +If the program does terminal interaction, make it output a short notice like this +when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type 'show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type 'show c' for details. + +The hypothetical commands `show w` and `show c` should show the appropriate parts of +the General Public License. Of course, your program's commands might be different; +for a GUI interface, you would use an “about box”. + +You should also get your employer (if you work as a programmer) or school, if any, to +sign a “copyright disclaimer” for the program, if necessary. For more +information on this, and how to apply and follow the GNU GPL, see +<>. + +The GNU General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may consider it +more useful to permit linking proprietary applications with the library. If this is +what you want to do, use the GNU Lesser General Public License instead of this +License. But first, please read +<>. diff --git a/NAMESPACE b/NAMESPACE old mode 100644 new mode 100755 index d75f824..5693112 --- a/NAMESPACE +++ b/NAMESPACE @@ -1 +1,36 @@ -exportPattern("^[[:alpha:]]+") +# Generated by roxygen2: do not edit by hand + +export("%>%") +export(acled_access) +export(acled_api) +export(acled_deletions_api) +export(acled_rounding) +export(acled_transform_interaction) +export(acled_transform_longer) +export(acled_transform_wider) +export(acled_update) +import(dplyr) +import(httr) +import(lubridate) +import(purrr) +import(stringr) +importFrom(dplyr,anti_join) +importFrom(dplyr,filter) +importFrom(dplyr,left_join) +importFrom(dplyr,mutate) +importFrom(dplyr,relocate) +importFrom(dplyr,rename) +importFrom(dplyr,select) +importFrom(magrittr,"%>%") +importFrom(methods,hasArg) +importFrom(rlang,.data) +importFrom(stringr,str_c) +importFrom(stringr,str_detect) +importFrom(stringr,str_replace) +importFrom(stringr,str_replace_all) +importFrom(stringr,str_trim) +importFrom(tidyr,pivot_longer) +importFrom(tidyr,pivot_wider) +importFrom(tidyr,replace_na) +importFrom(tidyr,separate_rows) +importFrom(utils,menu) diff --git a/R/acledR-package.R b/R/acledR-package.R new file mode 100644 index 0000000..656f240 --- /dev/null +++ b/R/acledR-package.R @@ -0,0 +1,12 @@ +#' @keywords internal +"_PACKAGE" + +## usethis namespace: start +## usethis namespace: end +NULL + + +utils::globalVariables(c("inter1", "assoc_actor_1", "inter2", "assoc_actor_2", + "actor", "type_of_actor", "inter_type", "assoc_actor", + "type_of_assoc_actor", "event_id_cnty", "admin3", "actor1", "actor2", "inter", + "country", "avg_month_bin", "avg_daily_bin", "n_days")) diff --git a/R/acled_access.R b/R/acled_access.R new file mode 100755 index 0000000..647e959 --- /dev/null +++ b/R/acled_access.R @@ -0,0 +1,45 @@ +#' @name acled_access +#' @title Store your ACLED access information into your session. +#' @description Simple function to authenticate and store (through `Sys.setenv()`) ACLED access key for the acled_api() function. If email and key is stored via acled_access, the email and key arguments for acled_api can be NULL. +#' @param email This is the email that you register in the ACLED Access portal (https://developer.acleddata.com/) +#' @param key This is the key generated by the ACLED Access portal. +#' @family API and Access +#' @examples +#' \dontrun{ +#' acled_access(email = "your_email", key = "your_key") +#' Sys.getenv("acled_email") +#' Sys.getenv("acled_key") +#' } +#' @seealso ACLED API Access guide +#' @export +#' @md + + +acled_access <- function(email, key) { + url <- paste0("https://api.acleddata.com/check/read/?email=", email, "&", "key=", key) + + + response <- httr::GET(url) + out <- httr::content(response) + + if (out$status != 200) { + if ((out$error$message) == "Incorrect email or access key entered. Please try again.") { + stop(paste0( + "Error: ", out$error$message, ". Error code: ", out$status, ". \n", + rlang::format_error_bullets(c( + "Key and email not authorized. Please verify your API credentials (key and email) and try again", + "If the error persists please contact access@acleddata.com." + )) + )) + } else { + stop(paste0("Error: ", out$error$message, ". Error code: ", out$status)) + } + } else { + if (out$status == 200) { + message("Success! Credentials authorized") + } + + Sys.setenv(acled_email = email) + Sys.setenv(acled_key = key) + } +} diff --git a/R/acled_api.R b/R/acled_api.R new file mode 100755 index 0000000..1021328 --- /dev/null +++ b/R/acled_api.R @@ -0,0 +1,495 @@ +#' @title Request data from ACLED API +#' @name acled_api +#' @description This function allows users to easily request data from the ACLED API. Users can include variables such as country, regions, dates of interest and the format (monadic or dyadic). The function returns a tibble of the desired ACLED events. +#' @param email character string. Email associated with your ACLED account registered at . +#' @param key character string. Access key associated with your ACLED account registered at . +#' @param country character vector. Default is NULL, which will return events for all countries. Pass a vector of countries names to retrieve events from specific countries. The list of ACLED countries. names may be found via acledR::acled_countries. +#' @param regions vector of region names (character) or region codes (numeric). Default is NULL, which will return events for all regions. Pass a vector of regions names or codes to retrieve events from countries. within specific regions. The list of ACLED regions may be found via acledR::acled_regions. +#' @param start_date character string. Format 'yyyy-mm-dd'. The earliest date for which to return events. The default is `1997-01-01`, which is the earliest date available. +#' @param end_date character string. Format 'yyyy-mm-dd'. The latest date for which to return events. The default is Sys.Date(), which is the most present date. +#' @param timestamp numerical or character string. Provide a date or datetime written as either a character string of yyyy-mm-dd or as a numeric Unix timestamp to access all events added or updated after that date. +#' @param event_types vector of one or more event types (character). Default is NULL, which will return data for all event types. To reurn data for only specific event types, request one or more of the following options (not case sensitive): Battles, Violence against civilians, Protests, Riots, Strategic Developments, and Explosions/Remote violence. +#' @param population character. Specify whether to return population estimates for each event. It accepts three options: "none" (default), "best", and "full". +#' @param monadic logical. If FALSE (default), returns dyadic data. If TRUE, returns monadic actor1 data. +#' @param ... string. Any additional parameters that users would like to add to their API calls (e.g. interaction or ISO) +#' @param acled_access logical. If TRUE (default), you have used the acled_access function and the email and key arguments are not required. +#' @param log logical. If TRUE, it provides a dataframe with the countries and days requested, and how many calls it entails. The dataframe is provided INSTEAD of the normal ACLED dataset. +#' @param prompt logical. If TRUE (default), users will receive an interactive prompt providing information about their call (countries requested, number of estimated events, and number of API calls required) and asking if they want to proceed with the call. If FALSE, the call continues without warning, but the call is split and returns a message specifying how many calls are being made. +#' @returns Returns a tibble of of ACLED events. +#' @family API and Access +#' @seealso +#' \itemize{ +#' \item ACLED API guide. +#' } +#' @examples +#' \dontrun{ +#' +#' # Get all the events coded by ACLED in Argentina from 01/01/2022 until 02/01/2022 +#' # in dyadic-wide form +#' argen_acled <- acled_api( +#' email = "your_email", key = "your_key", +#' country = "Argentina", start_date = "2022-01-01", end_date = "2022-02-01", +#' acled_access = FALSE +#' ) +#' +#' # tibble with all the events from Argentina where each row is one event. +#' argen_acled +#' +#' # Get all events coded by ACLED in the Caribbean from 01/01/2022 to 10/01/2022 +#' # in monadic-long form using email and key saved in environment +#' +#' acled_access(email = "your_email", key = "your_key") +#' carib_acled <- acled_api( +#' regions = "Caribbean", start_date = "2022-01-01", +#' end_date = "2022-01-10", monadic = TRUE, acled_access = TRUE +#' ) +#' +#' ## Tibble with all the events from the Caribbean where each row is one actor +#' carib_acled +#' } +#' @md +#' @import httr +#' @import dplyr +#' @import stringr +#' @import purrr +#' @import lubridate +#' @importFrom rlang .data +#' @importFrom utils menu +#' @importFrom methods hasArg +#' @export + +acled_api <- function(email = NULL, + key = NULL, + country = NULL, + regions = NULL, + start_date = floor_date(Sys.Date(), "year") - years(1), + end_date = Sys.Date(), + timestamp = NULL, + event_types = NULL, + population = "none", + monadic = FALSE, + ..., + acled_access = TRUE, + prompt = TRUE, + log = F) { + + # Acled Acess and credentials ---- + + if ((acled_access %in% c(TRUE, T)) & (is.null(email) | is.null(key))) { # Access is true, and credentials are null + email <- Sys.getenv("acled_email") + key <- Sys.getenv("acled_key") + if (nchar(email) <= 1 | nchar(key) <= 1) { + stop("Error in credentials: \n acled_access is TRUE, but email and/or key are not stored in the enviornment. Please rerun acled_access or include key and email in function") + } + } else if ((acled_access %in% c(TRUE, T)) & (!is.null(email) | !is.null(key))) { + message("acled_access is TRUE, but email and key are included in the function. Ignoring acled_access.") + } + + + # Stoppers for typos ---- + + if (hasArg("Country")) { + stop("Country is not a valid option. Please utilize \"country\", without capitalizing ") + } + + if (hasArg("Region")) { + stop("Region is not a valid option. Please utilize \"regions\"") + } + + if (hasArg("Regions")) { + stop("Regions is not a valid option. Please utilize \"regions\", without capitalizing") + } + + if (hasArg("Event_type")) { + stop("Event type is not a valid option. Please utilize \"event_types\", without capitalizing") + } + + if (hasArg("Start_date")) { + stop("Start_date is not a valid option. Please utilize \"start_date\", without capitalizing") + } + + if (hasArg("End_date")) { + stop("End_date is not a valid option. Please utilize \"end_date\", without capitalizing") + } + + if (!population %in% c("none", "best", "full")) { + stop("The 'population' argument must be one of 'none', 'best', or 'full'.") + } + + # Error checks for arguments ---- + + if (!is.character(email) || is.null(email) || (is.character(email) && nchar(email) < 3)) { + stop("Email address required for ACLED API access. 'email' must be a character string (e.g., 'name@mail.com') or a call to where your email address is located if stored as an environment variable (e.g., Sys.getenv('acled_email'). Register your email for access at https://developer.acleddata.com.") + } + email_internal <- paste0("&email=", email) + + if ((!is.character(key) || is.null(key) || key == "") == TRUE) { + stop("Key required for ACLED API access. 'key' must be a character string (e.g., 'xyz123!etc') or a call to where your ACLED API key is located if stored as an environment variable (e.g., Sys.getenv('acled_key'). Request and locate your ACLED API key at https://developer.acleddata.com.") + } + key_internal <- paste0("&key=", key) + + if (!is.null(country) & sum(unique(country) %in% acledR::acled_countries[["country"]]) < length(unique(country))) { + stop("One or more of the requested countries are not in ACLED's countries list. The full list of countries is available at 'acledR::acled_countries") + } + + # Checking if regions are input incorrectly ---- + if (is.character(regions) & sum(unique(regions) %in% acledR::acled_regions[["region_name"]]) < length(unique(regions))) { + stop("One or more requested region names not in the ACLED country list. The full list of ACLED regions is available at 'acledR::acled_regions'.") + } + if (is.numeric(regions) & sum(unique(regions) %in% acledR::acled_regions[["region"]]) < length(unique(regions))) { + stop("One or more requested region numbers not in the ACLED country list. The full list of ACLED regions is available at 'acledR::acled_regions'.") + } + + if(!population %in% c("none", "best", "full")) { + stop("The 'population' argument must be one of 'none', 'best', or 'full'.") + } + + # Required components ---- + base_url <- "https://api.acleddata.com/acled/read.csv?" + + # Calculate country days ---- + + # Setup base data to check how many country-days are being requested + if (!is.null(country) & is.null(regions)) { + test <- country + + + df <- acledR::acled_countries %>% + filter(.data$country %in% test) + + # Subset acled_multipliers (subset is faster than filter in our case) by relevant country & year + ex1_df <- subset(acledR::acled_multipliers, country %in% test, select = country:avg_month_bin) + ex1_df <- subset(ex1_df, year <= lubridate::year(end_date) & year >= lubridate::year(start_date)) + } else if (is.null(country) & !is.null(regions)) { + if (is.numeric(regions)) { + regions <- acledR::acled_regions %>% + filter(.data$region %in% regions) %>% + pull(.data$region_name) + } + + df <- acledR::acled_countries %>% + filter(.data$region %in% regions) + + ex1_df <- subset(acledR::acled_multipliers, country %in% unique(df$country), select = country:avg_month_bin) + ex1_df <- subset(ex1_df, year <= lubridate::year(end_date) & year >= lubridate::year(start_date)) + } else if (!is.null(country) & !is.null(regions)) { + if (is.numeric(regions)) { + regions <- acledR::acled_regions %>% + filter(.data$region %in% regions) %>% + pull(.data$region_name) + } + + test <- country + + df <- acledR::acled_countries %>% + filter((.data$country %in% test) | (.data$region %in% regions)) + + ex1_df <- subset(acledR::acled_multipliers, country %in% unique(df$country), select = country:avg_month_bin) + ex1_df <- subset(ex1_df, year <= lubridate::year(end_date) & year >= lubridate::year(start_date)) + } else { + df <- acledR::acled_countries + ex1_df <- subset(acledR::acled_multipliers, country %in% unique(df$country), select = country:avg_month_bin) + ex1_df <- subset(ex1_df, year <= lubridate::year(end_date) & year >= lubridate::year(start_date)) + } + + # Not checking unit test below as it is a non-critical feature, as start_date is no longer NULL by default. + if (is.null(start_date)) { # nocov start + start_date_check <- "1997-01-01" + } # nocov end + else { + start_date_check <- start_date + } + + if (is.null(end_date)) { + end_date_check <- Sys.Date() + } else { + end_date_check <- end_date + } + + # Inject + + days_per_year <- function(sd, ed) { + # Convert to Date objects + start <- as.Date(start_date) + end <- as.Date(end_date) + + # Identify the years in the range + years <- seq(year(start), year(end)) + + # Calculate days for each year + days_in_each_year <- sapply(years, function(y) { + start_of_year <- as.Date(paste0(y, "-01-01")) + end_of_year <- as.Date(paste0(y, "-12-31")) + + current_start <- ifelse(start_of_year < start, start, start_of_year) + current_end <- ifelse(end_of_year > end, end, end_of_year) + + as.numeric(current_end - current_start + 1) # +1 to make the end_date inclusive + }) + + names(days_in_each_year) <- years + return(days_in_each_year) + } + + object <- days_per_year(start_date_check, end_date_check) + + ex1_df <- ex1_df %>% + mutate( + # Add n_days_requested based of the days_per_year result + n_days = object[as.character(year)], + # Devide avg_month_bins into days, because not every call will be about months + avg_daily_bin = avg_month_bin / 30, + # Multiply the avg_daily_bin with the number of days + ee_events = avg_daily_bin * n_days + ) + + + out <- df %>% + mutate( + t_start = lubridate::as_date(start_date_check), + t_end = lubridate::as_date(end_date_check), + t_start = case_when( + as.numeric(lubridate::year(t_start)) < start_year ~ lubridate::as_date(paste0(start_year, "-01-01")), + TRUE ~ t_start + ), + time = .data$t_end - .data$t_start + ) + + n_countries <- length(unique(out$country)) + # country_days <- as.numeric(sum(out$time)) + + + + # Note for how much data is being requested + size_note <- paste( + "Requesting data for", + length(unique(ex1_df$country)), + "country.", + "Accounting for the requested time period and ACLED coverage dates, this request includes approximately", + format(acled_rounding(sum(ex1_df$ee_events)), big.mark = ","), "events." + ) + + message(size_note) + + if (str_detect(as.character(end_date), "2024") | str_detect(as.character(start_date), "2024")){ + message("Your request appears include dates in 2024. Please note that estimates of 2024 are based on 2023 estimates. ") + } + + + # Current ceilling 400k + time_units <- ceiling(sum(ex1_df$ee_events) / 400000) + + # Split call into roughly equally sized groups depending on how many country-days are in each country + # This randomly assigns country into bins + out_groups <- split(out, sample(1:time_units, nrow(out), replace = T)) + + if (log == T) { + if (length(out_groups) > 1) { + log_rep <- map_dfr(out_groups, bind_rows, .id = "id") %>% + mutate(calls = time_units) + } else { + log_rep <- out_groups[[1]] + log_rep$id <- "1" + log_rep$calls <- time_units + } + + log_rep$email <- email + log_rep$key <- key + + return(log_rep) + } + + # Dates + if (!is.null(start_date) & !is.null(end_date)) { + dates_internal <- paste0("&event_date=", paste(start_date, end_date, sep = "|"), "&event_date_where=BETWEEN") + } + + # I dont think this one immediatly below is correct. If either of these is null, it defaults to either sys today or to one year before for start_date + # potentiall commenting out + # if(is.null(start_date) != is.null(end_date)) { + # stop("Both 'start_date' and 'end_date' must be specified if a specific time period is requested. To request all time periods, leave both 'start_date' and 'end_date' NULL.") + # } + + if (!is.null(start_date) & !is.null(end_date)) { + if (start_date > end_date) { + stop("Requested 'start_date' is after the requested 'end_date'.") + } + } + + # Same as before, this cannot be null and null, as they have defaults already. + # if(is.null(start_date) & is.null(end_date)) { + # dates_internal <- "" + # } + + # Where + ## country + + countries_internal <- vector("list", length = length(out_groups)) + for (i in 1:length(out_groups)) { + countries_internal[[i]] <- paste0("&country=", paste(gsub("\\s{1}", "%20", out_groups[[i]]$country), collapse = ":OR:country=")) + countries_internal[[i]] <- paste0(countries_internal[[i]], "&country_where=%3D") + } + + + # Timestamps + if (!is.null(timestamp)) { + timestamp_into_date <- tryCatch( + { + lubridate::ymd(timestamp) + + timestamp_into_date <- "string" + }, + warning = function(w) { + a <- "numerical" + }, + error = function(e) { + a <- "numerical" + } + ) + + if (timestamp_into_date == "string") { + timestamp_parsable <- lubridate::ymd(timestamp) + do_i_include_timestamp <- "Yes" + } else { + timestamp_parsable <- tryCatch( + { + lubridate::date(lubridate::as_datetime(timestamp)) + do_i_include_timestamp <- "Yes_but_numerical" + }, + warning = function(w) { + za <- menu(c("Yes", "No"), + title = paste0("You indicated a timestamp, but it was not recognized. Reminder: Timestamp only accepts string as yyyy-mm-dd OR a Unix timestamp (integer).", "\n", "\n", "Do you want me to continue and ignore timestamp?") + ) + if (za == 1) { + do_i_include_timestamp <<- "No" + } else { + stop("User requested to abort when timestamp was not recognized.") + } + }, + error = function(e) { + stop("User requested to abort when timestamp was not recognized.") + } + ) + } + + if (do_i_include_timestamp == "Yes") { + if (timestamp_parsable > lubridate::now()) { + stop("The timestamp cannot be later than today. Please change the timestamp and try again.") + } else { + timestamp_internal <- paste0("×tamp=", timestamp_parsable) + } + } else if (do_i_include_timestamp == "Yes_but_numerical") { + timestamp_internal <- paste0("×tamp=", timestamp) + } else { + timestamp_internal <- "×tamp=" + } + } else { + timestamp_internal <- "×tamp=" + } + + # How + if (isTRUE(monadic)) { + monadic_internal <- "&export_type=monadic" + } else { + monadic_internal <- "" + } + + # Event types + if (!is.null(event_types)) { + event_types <- str_to_upper(event_types) + if (FALSE %in% unique(event_types %in% str_to_upper(c( + "Battles", "Violence against civilians", "Protests", + "Riots", "Strategic Developments", "Explosions/Remote violence" + )))) { + print(str_to_title(event_types)) + + stop("One or more requested event types are not in the ACLED data. Event types include: Battles, Violence against civilians, Protests, Riots, Strategic Developments, and Explosions/Remote violence. Leave 'event_type = NULL' to request all event types from the API. ") + } + + event_types_internal <- paste0("&event_type=", paste(gsub("\\s{1}", "%20", event_types), collapse = ":OR:event_type=")) + } else { + event_types_internal <- "" + } + + + # Interactive choice for users after prompting how many calls are required - I am nocov this one because of discrepancy between + # covr, devtools and testthat. After testing with testthat and devtools::test() it shows that it works. But covr seems to fail. + + if (prompt == TRUE) { # nocov start + + message(paste0( + "This request requires ", + time_units, + " API calls. Do you want to proceed with this request?\nIf you need to increase your API quota, please contact access@acleddata.com" + )) + + if (interactive()) { + user_input <- menu(title = "Proceed? (Yes/No)", choices = c("Yes", "No")) + if (user_input == 2) { + stop('User responded "No" when prompted about the number of API calls required. \nIf you need to increase your API quota, please contact access@acleddata.com', + call. = F + ) + } else { + message( + "Proceeding with ", + time_units, + " API calls" + ) + } + } + } # nocov end + + # Population argument + + if (population == "none") { + population_internal <- "" + } else if (population == "best") { + population_internal <- "&population=true" + } else { + population_internal <- "&population=full" + } + + # Population argument + + if(population == "none") { + population_internal <- "" + } else if (population == "best") { + population_internal <- "&population=true" + } else { + population_internal <- "&population=full" + } + + # Loop through country bins to define each api call + url_internal <- vector("list", length = length(out_groups)) + for(i in 1:length(out_groups)) { + url_internal[[i]] <- paste0(base_url, monadic_internal, + email_internal, key_internal, + countries_internal[[i]], + dates_internal, timestamp_internal, + event_types_internal, population_internal, ..., "&limit=0") + } + + + # Loop through the api requests + response <- vector("list", length = length(out_groups)) + message("Processing API request") + for (i in 1:length(out_groups)) { + response[[i]] <- httr::GET(url_internal[[i]]) + + if (response[[i]][["status_code"]] == 500) { + stop(paste0("API request unsuccessful with status code ", response[[i]][["status_code"]], ". \n", rlang::format_error_bullets(c("Make sure you have not execeeded your API calls (2/year for a standard account)", "Verify your API credentials (key and email)", "If nothing works contact us through GitHub Issues or at access@acleddata.com.")))) + } else if (response[[i]][["status_code"]] == 503 | response[[i]][["status_code"]] == 502) { + stop(paste0("API request unsuccessful with status code ", response[[i]][["status_code"]], ". \n", "Our server may be under maintenance or it may momentarily be unavailable; please try again in a couple of minutes.")) + } + } + + # Map through each get request to convert to one tibble + message("Extracting content from API request") + out <- suppressMessages(purrr::map_df( + .x = response, + ~ content(.x) + )) + + + return(out) +} diff --git a/R/acled_deletions_api.R b/R/acled_deletions_api.R new file mode 100755 index 0000000..b6e3648 --- /dev/null +++ b/R/acled_deletions_api.R @@ -0,0 +1,88 @@ +#' @title Request data from the ACLED Deletions API +#' @name acled_deletions_api +#' @description This function allows users to pull deleted ACLED event IDs from the Deletions API. +#' @param email character string. Email associated with your ACLED account registered at . +#' @param key character string. Access key associated with your ACLED account registered at . +#' @param date_deleted character string. Format 'yyyy-mm-dd' or Unix timestamp. The query will return all deleted events including and after the requested date/timestamp. +#' @param acled_access logical. If TRUE it means that you have utilized the acled_access function and there is no need for the email and key arguments. +#' @param log Only for testing purposes: you can use this to check if all the variables in your call were handled properly. +#' @returns Returns a tibble of ACLED data with columns for event_id_cnty and deleted_timestamp. +#' @family API and Access +#' @seealso +#' \itemize{ +#' \item \href{https://acleddata.com/download/35306/}{ACLED API guide} +#' \item \href{https://acleddata.com/download/35179/}{Keeping ACLED data up to date guide} +#' } +#' @examples +#' \dontrun{ +#' +#' # Request deleted ACLED events since January 1, 2022 +#' acled_deletions_api(date_deleted = "2022-01-01", acled_acess = TRUE) +#' } +#' @md +#' @import httr +#' +#' @export + + +acled_deletions_api <- function(email = NULL, + key = NULL, + date_deleted = NULL, + acled_access = TRUE, log = F) { + if ((acled_access %in% c(TRUE, T)) & (is.null(email) | is.null(key))) { # Access is true, and credentials are null + email <- Sys.getenv("acled_email") + key <- Sys.getenv("acled_key") + if (nchar(email) <= 1 | nchar(key) <= 1) { + stop("Error in credentials: \n acled_access is TRUE, but email and/or key are not stored in the enviornment. Please rerun acled_access or include key and email in function") + } + } else if ((acled_access %in% c(TRUE, T)) & (!is.null(email) | !is.null(key))) { + message("acled_access is TRUE, but email and key are included in the function. Ignoring acled_access.") + } + + # Required components + base_url <- "https://api.acleddata.com/deleted/read.csv?" + + if ((!is.character(email) || is.null(email) || email == "") == TRUE) { + stop("Email address required for ACLED API access. 'email' must be a character string (e.g., 'name@mail.com') or a call to where your email address is located if stored as an environment variable (e.g., Sys.getenv('email_adress'). Register your email for access at https://developer.acleddata.com.") + } + email_internal <- paste0("&email=", email) + + if ((!is.character(key) || is.null(key) || key == "") == TRUE) { + stop("Key required for ACLED API access. 'key' must be a character string (e.g., 'xyz123!etc') or a call to where your ACLED API key is located if stored as an environment variable (e.g., Sys.getenv('acled_key'). Request and locate your ACLED API key at https://developer.acleddata.com.") + } + key_internal <- paste0("&key=", key) + + # When + if (!is.null(date_deleted)) { + dates_internal <- paste0("&deleted_timestamp=", date_deleted) + } else { + dates_internal <- "" + } + + + url <- paste0( + base_url, + email_internal, + key_internal, + dates_internal, + "&limit=0" + ) + if (log == T) { + log_df <- tibble(email = email, key = key, date_deleted = date_deleted) + return(log_df) + } + + + response <- httr::GET(url) + + if (response[["status_code"]] == 500) { + stop(paste0("API request unsuccessful with status code ", response[["status_code"]], ". \n", rlang::format_error_bullets(c("Make sure you have not execeeded your API calls (2/year for a standard account)", "Verify your API credentials (key and email)", "If nothing works contact us through GitHub Issues or at access@acleddata.com.")))) + } else if (response[["status_code"]] == 503 | response[["status_code"]] == 502) { + stop(paste0("API request unsuccessful with status code ", response[["status_code"]], ". \n", "Our server may be under maintenance or it may momentarily be unavailable; please try again in a couple of minutes.")) + } + + out <- suppressMessages( + content(response) + ) + return(out) +} diff --git a/R/acled_rounding.R b/R/acled_rounding.R new file mode 100755 index 0000000..0f498b4 --- /dev/null +++ b/R/acled_rounding.R @@ -0,0 +1,43 @@ +#' @title Rounding function +#' @name acled_rounding +#' @description This function addresses some of the conflicts of rounding in R, especially when trying to round up. +#' @param num int. This is the number we are trying to round. +#' @param digits int. Where do we want to round up. It accepts 0 (whole number), 1 (tenth place), 2 (hundredths), etc. +#' @family Helpers +#' @details +#' This function is meant to address the problem of rounding in R where the approach is always round to even. The function is meant to round things following the simple rule. If the decimal is 5+ then round up, if not round down. With the 'digits' argument, one can set up the specificity of the rounding, 0= whole number, 1 = tenth place, 2=hundreds place, and so on. +#' @examples +#' x1 <- 1.569 +#' x2 <- 104.530 +#' x3 <- 54.430 +#' x4 <- 205.49999 +#' acled_rounding(x1) +#' acled_rounding(x2) +#' acled_rounding(x3) +#' acled_rounding(x4) +#' @md +#' @export + + + + + +acled_rounding <- function(num, digits = 0) { + num <- as.numeric(num) + digits <- as.numeric(digits) + + accuracy <- 1 / (10^digits) + extract_factor <- 10^(digits + 1) + key_digit <- (trunc(num * extract_factor)) %% 10 + + round_type <- ifelse(key_digit == 5, + ceiling, + round + ) + + round_any <- function(x, accuracy, f = round) { + f(x / accuracy) * accuracy + } + + return(round_any(num, accuracy, f = round_type)) +} diff --git a/R/acled_transform_interaction.R b/R/acled_transform_interaction.R new file mode 100644 index 0000000..2618333 --- /dev/null +++ b/R/acled_transform_interaction.R @@ -0,0 +1,90 @@ +#' @title Change interaction codes from numeric labels to string labels +#' @name acled_transform_interaction +#' @description This function allows users to change from numeric interaction codes (i.e. 1, 2, 3, etc) to string interaction codes (i.e. State Forces, Rebel Group, etc.) +#' @param df dataframe. ACLED data including at least inter1, inter2 columns. If `only_inters` is TRUE, it also requires interaction column. +#' @param only_inters boolean. Option whether to include the *interaction* column in the transformation (if TRUE) or to only use *inter1* and *inter2* (if FALSE). +#' @returns Returns a tibble of of ACLED events with modified *inter1*, *inter2* and potentially *interaction* columns . +#' @family Data Manipulation +#' @examples +#' \dontrun{ +#' +#' # Load data frame +#' argen_acled <- acled_api( +#' email = "your_email", key = "your_key", +#' country = "Argentina", start_date = "2022-01-01", end_date = "2022-02-01", +#' acled_access = FALSE +#' ) +#' +#' # Transform the interactions +#' argen_acled_transformed <- acled_transformation_interaction(argen_acled, only_inters = F) +#' } +#' @md +#' @importFrom dplyr left_join +#' @importFrom dplyr select +#' @importFrom dplyr rename +#' @importFrom dplyr relocate +#' @importFrom dplyr mutate +#' @importFrom stringr str_detect +#' @importFrom stringr str_replace +#' @importFrom stringr str_replace_all +#' @export +#' + + +acled_transform_interaction <- function(df, only_inters = F) { + if (!"inter1" %in% colnames(df)) { + stop("The input dataframe does not contain 'inter1' column. Please utilize a dataframe that has ACLED's column structure for the function to succeed.") + } + if (!"inter2" %in% colnames(df)) { + stop("The input dataframe does not contain 'inter2' column. Please utilize a dataframe that has ACLED's column structure for the function to succeed.") + } + if (!"interaction" %in% colnames(df)) { + stop("The input dataframe does not contain 'interaction' column. Please utilize a dataframe that has ACLED's column structure for the function to succeed.") + } + + + if (max(df$inter1 > 8) | max(df$inter2) > 8 | min(df$inter1 < 0 | min(df$inter2) < 0)) { + stop("One or more interaction codes were not recognized. Please remember interaction codes are positive integers from 1 to 8. ") + } + + + + test_changes <- df %>% + left_join(acledR::acled_interaction_codes, by = c("inter1" = "Numeric Code")) %>% + select(-inter1) %>% + rename(inter1 = "Inter1/Inter2") %>% + relocate(inter1, .after = assoc_actor_1) %>% + left_join(acledR::acled_interaction_codes, by = c("inter2" = "Numeric Code")) %>% + select(-inter2) %>% + rename(inter2 = "Inter1/Inter2") %>% + relocate(inter2, .after = assoc_actor_2) + + if (only_inters == F) { + test_changes <- test_changes %>% + mutate(interaction = case_when( + str_detect(interaction, "10") ~ "Sole State Forces", + str_detect(interaction, "20") ~ "Sole Rebel Groups", + str_detect(interaction, "30") ~ "Sole Political Militias", + str_detect(interaction, "40") ~ "Sole Identity Militias", + str_detect(interaction, "50") ~ "Sole Rioters", + str_detect(interaction, "60") ~ "Sole Protesters", + str_detect(interaction, "70") ~ "Sole Civilians", + str_detect(interaction, "80") ~ "Sole Others", + TRUE ~ as.character(interaction) + )) %>% + mutate( + interaction = str_replace_all(interaction, "(\\d)(\\d)", "\\1-\\2"), + interaction = str_replace(as.character(interaction), "1", "State Forces"), + interaction = str_replace(as.character(interaction), "2", "Rebel Groups"), + interaction = str_replace(as.character(interaction), "3", "Political Militias"), + interaction = str_replace(as.character(interaction), "4", "Identity Militias"), + interaction = str_replace(as.character(interaction), "5", "Rioters"), + interaction = str_replace(as.character(interaction), "6", "Protesters"), + interaction = str_replace(as.character(interaction), "7", "Civilians"), + interaction = str_replace(as.character(interaction), "8", "External/Other Forces") + ) + } + + + return(test_changes) +} diff --git a/R/acled_transform_longer.R b/R/acled_transform_longer.R new file mode 100644 index 0000000..c921035 --- /dev/null +++ b/R/acled_transform_longer.R @@ -0,0 +1,127 @@ +#' @title Transform ACLED data from wide to long +#' @name acled_transform_longer +#' @description Function to convert your ACLED's API calls (if dyadic) into desired monadic forms. +#' @param data, dataframe or tibble containing your dataset. +#' @param type, character string. One of five types: full_actors, main_actors, assoc_actors, source, or all. +#' \itemize{ +#' \item full_actors: All actor and associated actor columns +#' \item main_actors: Actor 1 and Actor 2 columns +#' \item assoc_actors: All associated actor columns +#' \item source: The source column becomes monadic +#' } +#' @return A tibble with the data transformed into long form. +#' @family Data Manipulation +#' @examples +#' \dontrun{ +#' # argen_acled <- acled_api(country = "Argentina",start_date = "2022-01-01", +#' # end_date="2022-02-01", acled_access = T, prompt = F) +#' +#' # argen_acled_long_actors <- acled_transform_wide_to_long(argen_acled, +#' # type = "full_actor") # Transforming the data +#' +#' # nrow(argen_acled_long_actors) # Number of rows in the dataset +#' # [1] 263 # Long form +#' +#' # nrow(argen_acled) ) # Number of rows in the dataset +#' # [1] 145 # Wide form +#' } +#' @md +#' @export +#' @importFrom rlang .data +#' @importFrom dplyr relocate +#' @importFrom dplyr mutate +#' @importFrom dplyr filter +#' @importFrom tidyr pivot_longer +#' @importFrom tidyr separate_rows +#' @importFrom stringr str_trim +#' + +acled_transform_longer <- function(data, type = "full_actors") { + # To - do Remove NAs rows from the assoc actors. + + ## types: full_actors, main_actors,assoc_actors,source + + columns_present <- function(df, cols) { + all(sapply(cols, function(x) !is.na(match(x, names(df))))) + } + + + + + if (type == "full_actors") { ## full actor -> pivot + separate into rows all actor columns + + if (!(columns_present(data, c("actor1", "actor2", "assoc_actor_1", "assoc_actor_2", "sub_event_type")))) { + stop("Some columns are missing. Please make sure your data frame includes: actor1, actor2, assoc_actor_1, assoc_actor_2, sub_event_type, source_scale, source.") + } + + if (any(grepl("[;]", data$actor1))) { + stop("Your actor1 column seems to include more than one result per row. That is inconsistent with our column structure.") + } else if (any(grepl("[;]", data$actor2))) { + stop("Your actor2 column seems to include more than one result per row. That is inconsistent with our column structure.") + } + separated_data <- data %>% + pivot_longer(cols = c("actor1", "actor2", "assoc_actor_1", "assoc_actor_2"), names_to = "type_of_actor", values_to = "actor") %>% + separate_rows(actor, sep = ";") %>% + # filter(actor != "") %>% + relocate(c("type_of_actor", "actor"), .after = "sub_event_type") %>% + mutate(actor = str_trim(actor)) %>% + pivot_longer(cols = c("inter1", "inter2"), names_to = "inter_type", values_to = "inter") %>% + filter(str_sub(type_of_actor, start = nchar(type_of_actor)) == str_sub(inter_type, start = nchar(inter_type))) %>% + relocate(c("inter_type", "inter"), .after = "actor") %>% + # Removing inters when the actor is an assoc_actor_1/2 + mutate(inter = case_when( + str_detect(type_of_actor, "assoc_*") ~ NA, + TRUE ~ inter + )) + + if (0 %in% nchar(separated_data$actor)) { + warning("There are empty rows in the actor column.") + } + } else if (type == "main_actors") { ## main_actors -> only pivot actor columns + if (!(columns_present(data, c("actor1", "actor2", "assoc_actor_1", "assoc_actor_2", "sub_event_type")))) { + stop("Some columns are missing. Please make sure your data frame includes: actor1, actor2, assoc_actor_1, assoc_actor_2, sub_event_type, source_scale, source.") + } + + if (any(grepl("[;]", data$actor1))) { + stop("Your actor1 column seems to include more than one result per row. That is inconsistent with our column structure.") + } else if (any(grepl("[;]", data$actor2))) { + stop("Your actor2 column seems to include more than one result per row. That is inconsistent with our column structure.") + } + separated_data <- data %>% + pivot_longer(cols = c("actor1", "actor2"), names_to = "type_of_actor", values_to = "actor") %>% + filter(actor != "") %>% + relocate(c("type_of_actor", "actor"), .after = "sub_event_type") %>% + mutate(actor = str_trim(actor)) %>% + pivot_longer(cols = c("inter1", "inter2"), names_to = "inter_type", values_to = "inter") %>% + filter(str_sub(type_of_actor, start = nchar(type_of_actor)) == str_sub(inter_type, start = nchar(inter_type))) %>% + relocate(c("inter_type", "inter"), .after = "actor") + } else if (type == "assoc_actors") { ## assoc_actors -> pivot + separate all assoc actor columns + if (!(columns_present(data, c("actor1", "actor2", "assoc_actor_1", "assoc_actor_2", "sub_event_type")))) { + stop("Some columns are missing. Please make sure your data frame includes: actor1, actor2, assoc_actor_1, assoc_actor_2, sub_event_type, source_scale, source.") + } + + separated_data <- data %>% + pivot_longer(cols = c("assoc_actor_1", "assoc_actor_2"), names_to = "type_of_assoc_actor", values_to = "assoc_actor") %>% + separate_rows(assoc_actor, sep = ";") %>% + relocate(c("type_of_assoc_actor", "assoc_actor"), .after = "sub_event_type") %>% + mutate(assoc_actor = str_trim(assoc_actor)) + + + + if (0 %in% nchar(separated_data$assoc_actor)) { + warning("There are empty rows in the assoc_actor column.") + } + } else if (type == "source") { ## source -> pivot + separate source column + if (!(columns_present(data, c("source_scale", "source")))) { + stop("Some columns are missing. Please make sure your data frame includes: actor1, actor2, assoc_actor_1, assoc_actor_2, sub_event_type, source_scale, source.") + } + + separated_data <- data %>% + separate_rows(source, sep = ";") %>% + mutate(source = str_trim(source, side = "both")) %>% + relocate(source, .before = "source_scale") %>% + mutate(source = str_trim(source)) + } + + return(separated_data) +} diff --git a/R/acled_transform_wider.R b/R/acled_transform_wider.R new file mode 100644 index 0000000..a0c2484 --- /dev/null +++ b/R/acled_transform_wider.R @@ -0,0 +1,225 @@ +#' @title Reverse Transform ACLED Data from Long to Wide +#' @name acled_transform_wider +#' @description Function to convert your ACLED's API calls (if monadic) back into the original dyadic forms. +#' @param data, a dataframe or tibble containing your dataset. +#' @param type, a character string. One of five types: full_actors, main_actors, assoc_actors, source, or all. +#' \itemize{ +#' \item full_actors: All actor and associated actor columns +#' \item main_actors: Actor 1 and Actor 2 columns +#' \item assoc_actors: All associated actor columns +#' \item source: The source column becomes dyadic +#' \item api_monadic: Use this option for data that is the output of the API's monadic option. +#' } +#' @return A tibble with the data transformed back into wide form. +#' @family Data Manipulation +#' @examples +#' \dontrun{ +#' # argen_acled <- acled_api(country = "Argentina",start_date = "2022-01-01", +#' # end_date="2022-02-01", acled_access = T, prompt = F) +#' # argen_acled_long_actors <- acled_transform_longer(argen_acled, +#' # type = "full_actor") # Transforming the data to long form +#' +#' # argen_acled_wide <- acled_transform_wider(argen_acled_long_actors, +#' # type = "full_actor") # Transforming the data back to wide form +#' +#' # nrow(argen_acled_wide) # Number of rows in the dataset +#' # [1] 145 # Wide form +#' +#' # nrow(argen_acled_long_actors) # Number of rows in the dataset +#' # [1] 263 # Long form +#' } +#' @md +#' @export +#' @importFrom rlang .data +#' @importFrom tidyr pivot_wider replace_na +#' @importFrom stringr str_c str_trim + +acled_transform_wider <- function(data, type = "full_actors") { + if (!(type %in% c("full_actors", "main_actors", "assoc_actors", "source", "api_monadic"))) { + stop(paste0("Error: ", type, " is not a valid option. Please select a valid option:\"full_actors\", \"main_actors\", \"assoc_actors\", \"source\", \"api_monadic\"")) + } + + if (type == "full_actors") { + columns_present <- function(df, cols) { + all(sapply(cols, function(x) !is.na(match(x, names(df))))) + } + + colnames_long <- c( + "actor", "type_of_actor", "inter_type", "inter" + ) + if (!(columns_present(data, colnames_long))) { + stop("Some columns are missing. Please make sure your data frame includes: actor,type_of_actor,inter_type, and inter.") + } + + reverse_data <- data %>% + # Pivot actor firsts, flattening joint actors such as assoc actors + pivot_wider(names_from = type_of_actor, values_from = actor, values_fn = function(x) str_flatten(x, collapse = "; "), values_fill = "") %>% + # Pivot inters next, adding a fill 9999 to those that do not apply, as a way of removing. inters from different types of actors + pivot_wider(names_from = inter_type, values_from = inter, values_fill = 9999) %>% + # Transform inter into character for collapsing + mutate( + inter1 = as.character(inter1), + inter2 = as.character(inter2) + ) %>% + mutate(inter1 = replace_na(inter1, "")) %>% + mutate(inter2 = replace_na(inter2, "")) %>% + group_by(across(c(-actor1, -actor2, -inter1, -inter2, -assoc_actor_1, -assoc_actor_2))) %>% + # Collapse repeated inters and actors + summarise( + actor1 = str_c(actor1, collapse = ""), + actor2 = str_c(actor2, collapse = ""), + inter1 = str_trim(str_remove_all(str_c(inter1, collapse = " "), "9999|\\s0\\s")), + inter2 = str_trim(str_remove_all(str_c(inter2, collapse = " "), "9999|\\s0\\s")), + assoc_actor_1 = str_c(assoc_actor_1, collapse = ""), + assoc_actor_2 = str_c(assoc_actor_2, collapse = "") + ) %>% + ungroup() %>% + # Transform inter into numeric column + mutate( + inter1 = as.numeric(inter1), + inter2 = as.numeric(inter2) + ) %>% + mutate( + actor2 = na_if(actor2, ""), + actor1 = na_if(actor1, ""), + assoc_actor_1 = na_if(assoc_actor_1, ""), + assoc_actor_2 = na_if(assoc_actor_2, ""), + inter1 = replace_na(inter1, 0), + inter2 = replace_na(inter2, 0) + ) %>% + # Match column structure for an acled dataset + select(names(acledR::acled_old_dummy)) + } else if (type == "main_actors") { + columns_present <- function(df, cols) { + all(sapply(cols, function(x) !is.na(match(x, names(df))))) + } + + colnames_long <- c( + "actor", "type_of_actor", "inter_type", "inter" + ) + if (!(columns_present(data, colnames_long))) { + stop("Some columns are missing. Please make sure your data frame includes: actor,type_of_actor,inter_type, and inter.") + } + + reverse_data <- data %>% + # Pivot actor firsts, flattening joint actors such as assoc actors + pivot_wider(names_from = type_of_actor, values_from = actor, values_fn = function(x) str_flatten(x, collapse = "; "), values_fill = "") %>% + # Pivot inters next, adding a fill 9999 to those that do not apply, as a way of removing. inters from different types of actors + pivot_wider(names_from = inter_type, values_from = inter, values_fill = 9999) %>% + # Transform inter into character for collapsing + mutate( + inter1 = as.character(inter1), + inter2 = as.character(inter2) + ) %>% + mutate(inter1 = replace_na(inter1, "")) %>% + mutate(inter2 = replace_na(inter2, "")) %>% + group_by(across(c(-actor1, -actor2, -inter1, -inter2))) %>% + # Collapse repeated inters and actors + summarise( + actor1 = str_c(actor1, collapse = ""), + actor2 = str_c(actor2, collapse = ""), + inter1 = str_trim(str_remove_all(str_c(inter1, collapse = " "), "9999")), + inter2 = str_trim(str_remove_all(str_c(inter2, collapse = " "), "9999")) + ) %>% + ungroup() %>% + # Transform inter into numeric column + mutate( + inter1 = as.numeric(inter1), + inter2 = as.numeric(inter2) + ) %>% + mutate( + actor2 = na_if(actor2, ""), + actor1 = na_if(actor1, ""), + assoc_actor_1 = na_if(assoc_actor_1, ""), + assoc_actor_2 = na_if(assoc_actor_2, ""), + inter1 = replace_na(inter1, 0), + inter2 = replace_na(inter2, 0) + ) %>% + # Match column structure for an acled dataset + select(names(acledR::acled_old_dummy)) + } else if (type == "assoc_actors") { + columns_present <- function(df, cols) { + all(sapply(cols, function(x) !is.na(match(x, names(df))))) + } + + colnames_long <- c( + "assoc_actor", "type_of_assoc_actor" + ) + if (!(columns_present(data, colnames_long))) { + stop("Some columns are missing. Please make sure your data frame includes: assoc_actor,type_of_assoc_actor.") + } + + reverse_data <- data %>% + # Pivot actor firsts, flattening joint actors such as assoc actors + pivot_wider(names_from = type_of_assoc_actor, values_from = assoc_actor, values_fn = function(x) str_flatten(x, collapse = "; "), values_fill = "") %>% + # Transform inter into character for collapsing + group_by(across(c(-assoc_actor_1, -assoc_actor_2))) %>% + # Collapse repeated inters and actors + summarise( + assoc_actor_1 = str_c(assoc_actor_1, collapse = ""), + assoc_actor_2 = str_c(assoc_actor_2, collapse = "") + ) %>% + ungroup() %>% + mutate( + actor2 = na_if(actor2, ""), + actor1 = na_if(actor1, ""), + assoc_actor_1 = na_if(assoc_actor_1, ""), + assoc_actor_2 = na_if(assoc_actor_2, ""), + inter1 = replace_na(inter1, 0), + inter2 = replace_na(inter2, 0) + ) %>% + # Match column structure for an acled dataset + select(names(acledR::acled_old_dummy)) + } else if (type == "source") { + columns_present <- function(df, cols) { + all(sapply(cols, function(x) !is.na(match(x, names(df))))) + } + + colnames_long <- c( + "source" + ) + if (!(columns_present(data, colnames_long))) { + stop("Some columns are missing. Please make sure your data frame includes: source") + } + + reverse_data <- data %>% + group_by(across(c(-source))) %>% + # Collapse repeated inters and actors + summarise(source = str_c(source, collapse = "; ")) %>% + ungroup() %>% + # Match column structure for an acled dataset + select(names(acledR::acled_old_dummy)) + } else if (type == "api_monadic") { + df1 <- data %>% + group_by(event_id_cnty) %>% + slice(1) %>% + ungroup() %>% + rename( + actor1 = actor1, + assoc_actor_1 = assoc_actor_1 + ) + + df2 <- data %>% + group_by(event_id_cnty) %>% + slice(2) %>% + ungroup() %>% + rename( + actor2 = actor1, + assoc_actor_2 = assoc_actor_1, + inter2 = inter1 + ) + + reverse_data <- df1 %>% + left_join(df2, by = c( + "event_id_cnty", "event_date", "year", "time_precision", "disorder_type", "event_type", + "sub_event_type", "interaction", "civilian_targeting", "iso", "region", "country", "admin1", + "admin2", "admin3", "location", "latitude", "longitude", "geo_precision", "source", "source_scale", + "notes", "fatalities", "tags", "timestamp" + )) %>% + relocate(c(actor2, assoc_actor_2, inter2), .after = inter1) %>% + mutate(inter2 = replace_na(inter2, 0)) %>% + mutate(admin3 = as.logical(admin3)) %>% + arrange(desc(event_id_cnty)) + } + return(reverse_data) +} diff --git a/R/acled_update.R b/R/acled_update.R new file mode 100644 index 0000000..506ff9f --- /dev/null +++ b/R/acled_update.R @@ -0,0 +1,161 @@ +#' @title Updating your ACLED dataset +#' @name acled_update +#' @description +#' This function is meant to help you keep your dataset updated, by automatically checking for new and modified events, as well as deleted events (if deleted = TRUE). +#' Note: The function makes new API calls to gather new and modified events. +#' @param df The dataframe to update, it has to have the same structure as ACLED's dyadic dataframe (i.e. the result of `acled_api()`) +#' @param start_date The first date of events you want to update from.. These are the celling and floor of *event_date*, not of *timestamp*. +#' @param end_date The last date of events you want to update from. These are the celling and floor of *event_date*, not of *timestamp*. +#' @param additional_countries string. Additional additional_countries to update your dataset. It defaults to “current countries”, which includes all the additional_countries inside your dataset. +#' @param regions string. The regions for which you would like events in your dataset updated. +#' @param event_types string. The event types for which you would like events in your dataset updated. +#' @param acled_access logical. If you have already used `acled_access()`, you can set this option as TRUE (default) to avoid having to input your email and access key. +#' @param email character string. Email associated with your ACLED account registered at . +#' @param key character string. Access key associated with your ACLED account registered at . +#' @param deleted logical. If TRUE (default), the function will also remove deleted events using acled_deletions_api(). +#' @param prompts logical. If TRUE (default), users will receive an interactive prompt providing information about their call (additional_countries requested, number of country-days, and number of API calls required) and asking if they want to proceed with the call. If FALSE, the call continues without warning, but the call is split and returns a message specifying how many calls are being made. +#' @return Tibble with updated ACLED data and a newer timestamp. +#' @family API and Access +#' @seealso +#' \itemize{ +#' \item ACLED Keeping your dataset updated guide. +#' } +#' @examples +#' \dontrun{ +#' # Updating dataset to include newer data from Argentina +#' +#' acledR::acled_access(email = "your_email", key = "your_key") +#' +#' new_argen_dataset <- acled_update(acledR::acled_old_dummy, +#' additional_countries = "Argentina", +#' acled_access = TRUE, +#' prompts = FALSE +#' ) +#' } +#' +#' @md +#' @importFrom dplyr filter +#' @importFrom dplyr anti_join +#' @importFrom methods hasArg +#' +#' @export + + +# acled_update +acled_update <- function(df, + start_date = min(df$event_date), + end_date = max(df$event_date), + additional_countries = "current countries", + regions = NULL, + event_types = NULL, + acled_access = TRUE, + email = NULL, + key = NULL, + deleted = TRUE, + prompts = TRUE) { ## This is added for the hasArg statements to work. Not sure why it doenst work without it. + + + if (!setequal(colnames(df), colnames(acledR::acled_old_deletion_dummy))) { + stop("The data frame provided does not have ACLED's structure. Please make sure the data frame provided follows the same structure.") + } + + + if (start_date < min(df$event_date)) { + warning("Warning: Start date is earlier than the earliest event date in your dataframe.") + } + + if (start_date > min(df$event_date)) { + warning("Warning: Start date is later than the earliest event date in your dataframe.") + } + + if (end_date > max(df$event_date)) { + warning("Warning: End date is later than the latest event date in your dataframe.") + } + + if (end_date < max(df$event_date)) { + warning("Warning: End date is earlier than the latest event date in your dataframe.") + } + + if (all(additional_countries == "current countries")) { + additional_countries <- unique(df$country) + } else { + additional_countries <- append(unique(df$country), additional_countries) + } + + # Check acled_access + if (!acled_access && (is.null(email) || is.null(key))) { + stop("Error: If acled_access is FALSE, you must provide an email and key.") + } + + # Check event_types + if (!is.null(event_types)) { + valid_event_types <- acledR::acled_event_categories$event_type + if (!all(event_types %in% valid_event_types)) { + stop("Error: Invalid event_type provided. Please use an event type present in ACLED's methodology.") + } + } + + # Error check for additional_countries + if (!all(additional_countries %in% acledR::acled_countries$country)) { + missing_countries <- additional_countries[!(additional_countries %in% acledR::acled_countries$country)] + stop(paste( + "Error: The following additional_countries are not present in acledR::acled_countries:", + paste(missing_countries, collapse = ", ") + )) + } + + # Error check for regions + if (!all(regions %in% acledR::acled_regions$region_name)) { + missing_regions <- regions[!(regions %in% acledR::acled_regions$region_name)] + stop(paste( + "Error: The following regions are not present in acledR::acled_regions:", + paste(missing_regions, collapse = ", ") + )) + } + + + max_timestamp <- max(df$timestamp) + + if (deleted == TRUE) { + deleted_events <- acled_deletions_api(email = email, key = key, date_deleted = max_timestamp, acled_access = acled_access) + + after_deleted <- df %>% + filter(!(df$event_id_cnty %in% deleted_events$event_id_cnty)) + } else { + after_deleted <- df + } + + + new_dataset <- acled_api( + email = email, + key = key, + start_date = start_date, + end_date = end_date, + country = additional_countries, + regions = regions, + event_types = event_types, + acled_access = acled_access, + timestamp = max_timestamp, + prompt = prompts + ) + + updated_dataset <- after_deleted %>% + anti_join(new_dataset, by = "event_id_cnty") %>% + rbind(new_dataset) + + if (deleted == TRUE) { + message(paste0( + "Dataset updated. \n Old number of events: ", nrow(df), + ". \n New events: ", nrow(updated_dataset) - nrow(df), + ". \n Deleted events: ", nrow(df) - nrow(after_deleted), + ". \n Total new & modified events: ", nrow(new_dataset) + )) + } else { + message(paste0( + "Dataset updated. \n Old number of events: ", nrow(df), + ". \n New events: ", nrow(updated_dataset) - nrow(df), + ". \n Total new & modified events: ", nrow(new_dataset) + )) + } + return(updated_dataset) +} diff --git a/R/data.R b/R/data.R new file mode 100755 index 0000000..1715a63 --- /dev/null +++ b/R/data.R @@ -0,0 +1,154 @@ +#' ACLED Codebook +#' +#' Codebook for ACLED data +#' +#' @family Data +#' @format A data frame: +#' \describe{ +#' \item{Variable}{Variable names} +#' \item{Description}{Text description of each variable} +#' \item{Values}{Text description of values for each variable}} +"acled_codebook" + +#' ACLED Countries +#' +#' ACLED country names, regions, and coding start year +#' +#' @family Data +#' @format A data frame: +#' \describe{ +#' \item{country}{Country names} +#' \item{region}{Region names} +#' \item{start_year}{First year coded by ACLED}} +"acled_countries" + +#' ACLED Multipliers +#' +#' A dataframe with additional information for each country, only for the purpose of estimating events. +#' +#' @family Data +#' @format A data frame: +#' \describe{ +#' \item{country}{Country names} +#' \item{bin}{Bin of event frequency} +#' \item{year}{Year corresponding to the bin} +#' \item{avg_month_bin}{Average monthly of the bin} +#' } +"acled_multipliers" + +#' ACLED Regions +#' +#' ACLED region names, region numbers, and coding start dates +#' +#' @family Data +#' @format A data frame: +#' \describe{ +#' \item{region}{Region number} +#' \item{region_name}{Region names} +#' \item{first_event_date}{First date (yyyy-mm-dd) coded by ACLED}} +"acled_regions" + +#' ACLED Event Categories +#' +#' ACLED event and sub-event types, grouped by category +#' +#' @family Data +#' @format A data frame: +#' \describe{ +#' \item{event_type}{ACLED event type} +#' \item{sub_event_type}{ACLED sub-event type} +#' \item{political_violence}{Dummy indicator for whether sub-event type falls within political violence} +#' \item{organized_political_violence}{Dummy indicator for whether sub-event type falls within organized political violence} +#' \item{disorder}{Dummy indicator for whether sub-event type falls within disorder} +#' \item{demonstrations}{Dummy indicator for whether sub-event type falls within demonstrations}} +"acled_event_categories" + +#' A dummy data frame of ACLED events emulating an old format, used in "Keeping your dataset updated" Vignette +#' +#' Small dataset of events in Argentina, purposefully including events which are currently deleted/modified. +#' @family Data +#' @format A data frame: +#' \describe{ +#' \item{event_id_cnty}{An unique individual identifier by number and country acronym (updated annually)} +#' \item{event_date}{The day, month and year on which an event took place} +#' \item{year}{The year in which an event took place} +#' \item{time_precision}{A numeric code indicating the level of certainty of the date coded for the event} +#' \item{disorder_type}{Type of disorder associated with the event and sub event type} +#' \item{event_type}{The type of event} +#' \item{sub_event_type}{The type of sub-event} +#' \item{actor1}{The named actor involved in the event. Note: Actor 1 and Actor 2 do not imply directionality (e.g. attacker or defender)} +#' \item{assoc_actor_1}{The named actor associated with or identifying actor1} +#' \item{inter1}{A numeric code indicating the type of actor1} +#' \item{actor2}{The named actor involved in the event. Note: Actor 1 and Actor 2 do not imply directionality (e.g. attacker or defender)} +#' \item{assoc_actor_2}{The named actor associated with or identifying actor1} +#' \item{inter2}{A numeric code indicating the type of actor1} +#' \item{interaction}{A numeric code indicating the interaction between types of actor1 and actor2} +#' \item{civilian_targeting}{Column referencing the presence of civilian targeting} +#' \item{iso}{A numeric code for each individual country} +#' \item{region}{The region of the world where the event took place} +#' \item{country}{The country in which the event took place} +#' \item{admin1}{The largest sub-national administrative region in which the event took place} +#' \item{admin2}{The second largest sub-national administrative region in which the event took place} +#' \item{admin3}{The third largest sub-national administrative region in which the event took place} +#' \item{location}{The location in which the event took place} +#' \item{latitude}{The latitude of the location} +#' \item{longitude}{The longitude of the location} +#' \item{geo_precision}{A numeric code indicating the level of certainty of the location coded for the event} +#' \item{source}{The source of the event report} +#' \item{source_scale}{The scale (local, regional, national, international) of the source} +#' \item{notes}{A short description of the event} +#' \item{fatalities}{The number of reported fatalities which occurred during the event} +#' \item{tags}{Tags associated with the event.} +#' \item{timestamp}{Numeric code of time}} +"acled_old_dummy" + +#' Second dummy data frame of ACLED events emulating an old format, used in acled_deletion_api Vignette +#' +#' Large dataset of multiple regions and countries, purposefully including deleted/modified events. +#' +#' @family Data +#' @format A data frame: +#' \describe{ +#' \item{event_id_cnty}{An unique individual identifier by number and country acronym (updated annually)} +#' \item{event_date}{The day, month and year on which an event took place} +#' \item{year}{The year in which an event took place} +#' \item{time_precision}{A numeric code indicating the level of certainty of the date coded for the event} +#' \item{disorder_type}{Type of disorder associated with the event and sub event type} +#' \item{event_type}{The type of event} +#' \item{sub_event_type}{The type of sub-event} +#' \item{actor1}{The named actor involved in the event. Note: Actor 1 and Actor 2 do not imply directionality (e.g. attacker or defender)} +#' \item{assoc_actor_1}{The named actor associated with or identifying actor1} +#' \item{inter1}{A numeric code indicating the type of actor1} +#' \item{actor2}{The named actor involved in the event. Note: Actor 1 and Actor 2 do not imply directionality (e.g. attacker or defender)} +#' \item{assoc_actor_2}{The named actor associated with or identifying actor1} +#' \item{inter2}{A numeric code indicating the type of actor1} +#' \item{interaction}{A numeric code indicating the interaction between types of actor1 and actor2} +#' \item{civilian_targeting}{Column referencing the presence of civilian targeting} +#' \item{iso}{A numeric code for each individual country} +#' \item{region}{The region of the world where the event took place} +#' \item{country}{The country in which the event took place} +#' \item{admin1}{The largest sub-national administrative region in which the event took place} +#' \item{admin2}{The second largest sub-national administrative region in which the event took place} +#' \item{admin3}{The third largest sub-national administrative region in which the event took place} +#' \item{location}{The location in which the event took place} +#' \item{latitude}{The latitude of the location} +#' \item{longitude}{The longitude of the location} +#' \item{geo_precision}{A numeric code indicating the level of certainty of the location coded for the event} +#' \item{source}{The source of the event report} +#' \item{source_scale}{The scale (local, regional, national, international) of the source} +#' \item{notes}{A short description of the event} +#' \item{fatalities}{The number of reported fatalities which occurred during the event} +#' \item{tags}{Tags associated with the event.} +#' \item{timestamp}{Numeric code of time}} +"acled_old_deletion_dummy" + +#' ACLED interaction codes +#' +#' ACLED interaction and actor types +#' +#' @family Data +#' @format A data frame: +#' \describe{ +#' \item{Inter1/Inter2}{Actor type} +#' \item{Numeric Code}{Numeric equivalent found in the inter1 and inter2 column. }} +"acled_interaction_codes" diff --git a/R/generate_counts.R b/R/generate_counts.R deleted file mode 100644 index e3a7d2e..0000000 --- a/R/generate_counts.R +++ /dev/null @@ -1,87 +0,0 @@ -#' Generate event counts from ACLED data -#' -#' @param data ACLED data -#' @param event_type Event types to include -#' @param unit_id Unit variable -#' @param time_id Temporal variable -#' @param time_target Target temporal unit -#' @param start_date Earliest date to include -#' @param end_date Latest date to include -#' @param add_unit_ids Option to add in units with no events at certain time periods -#' @return Returns a tibble grouped by unit_id -#' -#' -#' -#' @export -generate_counts <- - function(data, event_type = NULL, unit_id, time_id, time_target, - start_date = NULL, end_date = NULL, - add_unit_ids = NULL) { - - if(!is.null(event_type)) - if(sum(event_type %in% unique(data[["event_type"]])) < length(event_type)) - stop("One or more requested event types not in data.") - - if(is.null(start_date)) - start_date <- min(data[["event_date"]]) - - if(is.null(end_date)) - end_date <- max(data[["event_date"]]) - - - if(min(data[["event_date"]]) > as.Date(end_date)) - stop("Earliest event date in data is after the requested end_date.") - - if(max(data[["event_date"]]) < as.Date(start_date)) - stop("Latest event date in data is before requested start_date.") - - if(min(data[["event_date"]]) > as.Date(start_date)) - warning("Requested start_date is before the earliest date in the data. Returning only dates in the requested range that are within the temporal bounds of the data.") - - if(max(data[["event_date"]]) < as.Date(end_date)) - warning("Requested end_date is after the latest date in the data. Returning only dates in the requested range that are within the temporal bounds of the data.") - - - - - all_dates <- seq(floor_date(as.Date(start_date), time_target, - week_start = getOption('lubridate.week.start', 6)), - floor_date(as.Date(end_date), time_target, - week_start = getOption('lubridate.week.start', 6)), - by = time_target) - - - - if(is.null(event_type)) { - filter_types <- unique(data[["event_type"]]) - } else { - filter_types <- event_type - } - - if(!is.null(add_unit_ids)) - add_unit_ids <- unique(data[[unit_id]]) - else(add_unit_ids <- c(unique(data[[unit_id]], add_unit_ids))) - - - data %>% - filter(event_type %in% filter_types) %>% - filter(between(event_date, as.Date(start_date), as.Date(end_date))) %>% - mutate(event_date = ymd(.data[[time_id]]), - event_time = floor_date(event_date, time_target, week_start = getOption('lubridate.week.start', 6))) %>% - - group_by(.data[[unit_id]], event_time) %>% - - summarise(count = n()) %>% - ungroup() %>% - full_join(merge(add_unit_ids, all_dates) %>% - as_tibble() %>% - rename(country = x, event_time = y)) %>% - mutate(count = case_when(is.na(count) ~ as.numeric(0), - TRUE ~ as.numeric(count))) %>% - rename(!!paste0("event_", time_target) := event_time) %>% - suppressMessages() - - - - } - diff --git a/R/generate_movers.R b/R/generate_movers.R deleted file mode 100644 index d90184a..0000000 --- a/R/generate_movers.R +++ /dev/null @@ -1,50 +0,0 @@ -#' Generate event counts from ACLED data -#' -#' @param data ACLED data. -#' @param unit_id Unit variable. -#' @param time_id Temporal variable. -#' @param slide_funs Requested moving statistics. Character vector with options including mean, median, min, and max. -#' @param slide_periods How many periods in the past to summarize over. Vector of one or more integers. Inf includes all previous periods. -#' @param na.rm Whether to include NAs in the calculations. -#' @return Returns a tibble grouped by unit_id. -#' -#' -#' -#' @export - - -generate_movers <- - function(data, var, unit_id, time_id, slide_funs, slide_periods, na.rm = T, complete = T) { - - all_funs <- list(mean = function(x) {mean(x, na.rm = na.rm)}, - sd = function(x) {sd(x, na.rm = na.rm)}, - min = function(x) {min(x, na.rm = na.rm)}, - max = function(x) {max(x, na.rm = na.rm)}) - - cross_tbl <- expand_grid(all_funs, slide_periods) %>% - mutate(fun_name = attr(all_funs, "name")) %>% - filter(fun_name %in% slide_funs) - - - data %>% - group_by(.data[[unit_id]]) %>% - arrange(.data[[unit_id]], .data[[time_id]]) %>% - # mutate(var_lag = case_when(isFALSE(include_present) ~ lag(.data[[var]], 1), - # TRUE ~ as.numeric(.data[[var]]))) %>% - - nest() %>% - mutate(moving = map(data, - function(df) { - map2_dfc(.x = cross_tbl$all_funs, - .y = cross_tbl$slide_periods, - ~slider::slide_dbl(.x = df[[var]], .f = .x, .before = .y, - .complete = complete)) %>% - rename_with(., ~paste("moving", attr(cross_tbl$all_funs, "name"), cross_tbl$slide_periods, sep = "_")) - } - ) - ) %>% - - unnest(c(data, moving)) %>% - suppressMessages() - - } diff --git a/R/utils-pipe.R b/R/utils-pipe.R new file mode 100755 index 0000000..fd0b1d1 --- /dev/null +++ b/R/utils-pipe.R @@ -0,0 +1,14 @@ +#' Pipe operator +#' +#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +#' +#' @name %>% +#' @rdname pipe +#' @keywords internal +#' @export +#' @importFrom magrittr %>% +#' @usage lhs \%>\% rhs +#' @param lhs A value or the magrittr placeholder. +#' @param rhs A function call using the magrittr semantics. +#' @return The result of calling `rhs(lhs)`. +NULL diff --git a/README.Rmd b/README.Rmd new file mode 100755 index 0000000..7f52d0a --- /dev/null +++ b/README.Rmd @@ -0,0 +1,74 @@ +--- +output: github_document +--- + + + +```{r, echo = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.path = "man/figures/" +) +``` + +# acledR + + + +[![R-CMD-check](https://github.com/ACLED/acledR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/ACLED/acledR/actions/workflows/R-CMD-check.yaml) +[![codecov](https://codecov.io/gh/ACLED/acledR/graph/badge.svg?token=TDJodXhEvx)](https://codecov.io/gh/ACLED/acledR) + + +Welcome to ACLED's official R package! With this package we want to bring you an even more convenient way to access ACLED data, building on top of our existing API. + +In this repo you can find the source code of the package, as well as the documentation. You can also visit https://acled.github.io/acledR/ for more information and walkthroughs on how to use the package. + + +# Overview + +This package provides tools to extract and manipulate data from the [Armed Conflict Location and Event Data Project (ACLED)](https://acleddata.com/). + +To access ACLED data, please register an account at [developer.acleddata.com](developer.acleddata.com). + +* You can visit our [guide](https://acleddata.com/acleddatanew//wp-content/uploads/2021/11/ACLED_Access-Guide_October-2020.pdf) on how to easily set up your ACLED account. +* We recommend you to save your ACLED API Key in an R object so you can easily re-use your key. + +## Installation (for private repo) + +Since the package repo is currently private, you need to tell R and Github that you're a collaborator. To do so, you first create a Github personal access token (PAT). You can set this to expire after a certain time (the default) or be permanent. We can initiate this process internally via: + +```{r, eval=F} +# install.packages("usethis") if not installed already +# create personal access token - this should redirect to your github page where you can copy the token +usethis::create_github_token() +``` + +After you've copied the PAT from the browser and saved it somewhere safe, return to R and run this, which will store the PAT locally. + +```{r, eval=F} +# paste the token where it says YourPAT +credentials::set_github_pat("YourPAT") +# if you run this, it should print your token; if not we've failed +Sys.getenv("GITHUB_PAT") +``` + +Now you can install the package and it will automatically locate your PAT. + +```{r, eval=F} +# install from github +devtools::install_github("ACLED/acledR") +``` + + +## Installation (for public use) + +The package is on the process to get submited to the Comprehensive R Archive Network (CRAN). In the meantime, you can install the package from Github. +```{r, eval = F} +devtools::install_github("ACLED/acledR") ## if you are interested in a particular branch, please add a 'ref' argument. +``` + +Once the package is uploaded to CRAN, you can install it with the following command: +```{r, eval = F} +install.packages("acledR") +``` diff --git a/README.md b/README.md new file mode 100755 index 0000000..f71f5e3 --- /dev/null +++ b/README.md @@ -0,0 +1,80 @@ + + + +# acledR + + + +[![R-CMD-check](https://github.com/ACLED/acledR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/ACLED/acledR/actions/workflows/R-CMD-check.yaml) +[![codecov](https://codecov.io/gh/ACLED/acledR/graph/badge.svg?token=TDJodXhEvx)](https://codecov.io/gh/ACLED/acledR) + + + +# Overview + +This package provides tools to extract and manipulate data from the +[Armed Conflict Location and Event Data Project +(ACLED)](https://acleddata.com/). + +To access ACLED data, please register an account at +[developer.acleddata.com](developer.acleddata.com). + +- You can visit our + [guide](https://acleddata.com/acleddatanew//wp-content/uploads/2021/11/ACLED_Access-Guide_October-2020.pdf) + on how to easily set up your ACLED account. +- We recommend you to save your ACLED API Key in an R object so you can + easily re-use your key. + +## Installation (for private repo) + +Since the package repo is currently private, you need to tell R and +Github that you’re a collaborator. To do so, you first create a Github +personal access token (PAT). You can set this to expire after a certain +time (the default) or be permanent. We can initiate this process +internally via: + +``` r +# install.packages("usethis") if not installed already +# create personal access token - this should redirect to your github page where you can copy the token +usethis::create_github_token() +``` + +After you’ve copied the PAT from the browser, return to R and run this, +which will store the PAT locally. + +``` r +# paste the token where it says YourPAT +credentials::set_github_pat("YourPAT") +# if you run this, it should print your token; if not we've failed +Sys.getenv("GITHUB_PAT") +``` + +I recommend also setting the PAT in your `.Rprofile` so it’s stored for +all R sessions (i.e., you don’t have to save the PAT and paste it in +each time you re-install). + +``` r +# to set your PAT for all R sessions, run +file.edit(file.path("~", ".Rprofile")) +# and then paste credentials::set_github_pat("YourPAT") into the .Rprofile script +# save the file +``` + +Now you can install the package and it will automatically locate your +PAT. + +``` r +# install from github +devtools::install_github("ACLED/acledR") +``` + +## Installation (for public use) + +Until the acledR package gets added into the Comprehensive R Archive +Network (CRAN), users can utilize devtools to install the package from +Github. Thankfully, the installation is rather simple. You can install +it through the following code: + +``` r +devtools::install_github("ACLED/acledR") ## if you are interested in a particular branch, please add a 'ref' argument. +``` diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..4c05c88 --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,36 @@ +url: https://acled.github.io/acledR/ +template: + bootstrap: 5 + bootswatch: zephyr +navbar: + title: acledR + left: + - icon: fa-home + href: index.html + - text: Reference + href: reference/index.html + - text: Get Started + href: articles/get_started.html + - text: Utilizing acledR + href: index.html + menu: + - text: ACLED API + href: articles/acled_api.html + - text: Keeping your Dataset Updated + href: articles/acled_update.html + - text: acledR Data Manipulation + href: articles/acled_transformations.html +reference: +- title: Functions +- subtitle: API & Access + desc: Functions to aid gathering data +- contents: has_concept("API and Access") +- subtitle: Data Manipulation + desc: Manipulation of data provided by ACLED API. + contents: has_concept("Data Manipulation") +- subtitle: Helpers + desc: Functions for general help on using acledR + contents: has_concept("Helpers") +- subtitle: Data + contents: has_concept("Data") + diff --git a/acledR.Rproj b/acledR.Rproj old mode 100644 new mode 100755 diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..365b623 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,18 @@ +codecov: + token: e5ee5874-dfbb-4a73-8bd6-a42625bf9687 + + +comment: false + +coverage: + status: + project: + default: + target: auto + threshold: 1% + informational: true + patch: + default: + target: auto + threshold: 1% + informational: true diff --git a/data/acled_codebook.rda b/data/acled_codebook.rda new file mode 100755 index 0000000..31047c3 Binary files /dev/null and b/data/acled_codebook.rda differ diff --git a/data/acled_countries.rda b/data/acled_countries.rda new file mode 100755 index 0000000..84dfb69 Binary files /dev/null and b/data/acled_countries.rda differ diff --git a/data/acled_event_categories.rda b/data/acled_event_categories.rda new file mode 100755 index 0000000..7b4635e Binary files /dev/null and b/data/acled_event_categories.rda differ diff --git a/data/acled_interaction_codes.rda b/data/acled_interaction_codes.rda new file mode 100644 index 0000000..aaf1e08 Binary files /dev/null and b/data/acled_interaction_codes.rda differ diff --git a/data/acled_multipliers.rda b/data/acled_multipliers.rda new file mode 100644 index 0000000..751271d Binary files /dev/null and b/data/acled_multipliers.rda differ diff --git a/data/acled_old_deletion_dummy.rda b/data/acled_old_deletion_dummy.rda new file mode 100644 index 0000000..2ed5110 Binary files /dev/null and b/data/acled_old_deletion_dummy.rda differ diff --git a/data/acled_old_dummy.rda b/data/acled_old_dummy.rda new file mode 100644 index 0000000..212ad8c Binary files /dev/null and b/data/acled_old_dummy.rda differ diff --git a/data/acled_regions.rda b/data/acled_regions.rda old mode 100644 new mode 100755 index 903400e..fd62a25 Binary files a/data/acled_regions.rda and b/data/acled_regions.rda differ diff --git a/man/acledR-package.Rd b/man/acledR-package.Rd new file mode 100644 index 0000000..995a86a --- /dev/null +++ b/man/acledR-package.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/acledR-package.R +\docType{package} +\name{acledR-package} +\alias{acledR} +\alias{acledR-package} +\title{acledR: Manipulate 'ACLED' Data} +\description{ +\if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} + +The package allows users to easily interact with 'ACLED' data by providing wrappers for the API and other functions to manipulate 'ACLED' data. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/ACLED/acledR} + \item \url{https://acled.github.io/acledR/} + \item Report bugs at \url{https://github.com/ACLED/acledR/issues} +} + +} +\author{ +\strong{Maintainer}: ACLED \email{data@acleddata.com} [copyright holder] + +Authors: +\itemize{ + \item Lucas Fagliano \email{l.fagliano@acleddata.com} + \item Trey Billing \email{t.billing@acleddata.com} + \item Rachel Goodman \email{goodman.r@wustl.edu} + \item Katayoun Kishi \email{k.kishi@acleddata.com} + \item Michael Start \email{m.start@acleddata.com} +} + +} +\keyword{internal} diff --git a/man/acled_access.Rd b/man/acled_access.Rd new file mode 100755 index 0000000..53e13f7 --- /dev/null +++ b/man/acled_access.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/acled_access.R +\name{acled_access} +\alias{acled_access} +\title{Store your ACLED access information into your session.} +\usage{ +acled_access(email, key) +} +\arguments{ +\item{email}{This is the email that you register in the ACLED Access portal (https://developer.acleddata.com/)} + +\item{key}{This is the key generated by the ACLED Access portal.} +} +\description{ +Simple function to authenticate and store (through \code{Sys.setenv()}) ACLED access key for the acled_api() function. If email and key is stored via acled_access, the email and key arguments for acled_api can be NULL. +} +\examples{ +\dontrun{ +acled_access(email = "your_email", key = "your_key") +Sys.getenv("acled_email") +Sys.getenv("acled_key") +} +} +\seealso{ +ACLED API Access guide \url{https://acleddata.com/download/35300/} + +Other API and Access: +\code{\link{acled_api}()}, +\code{\link{acled_deletions_api}()}, +\code{\link{acled_update}()} +} +\concept{API and Access} diff --git a/man/acled_api.Rd b/man/acled_api.Rd new file mode 100755 index 0000000..cf25ca0 --- /dev/null +++ b/man/acled_api.Rd @@ -0,0 +1,96 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/acled_api.R +\name{acled_api} +\alias{acled_api} +\title{Request data from ACLED API} +\usage{ +acled_api( + email = NULL, + key = NULL, + country = NULL, + regions = NULL, + start_date = floor_date(Sys.Date(), "year") - years(1), + end_date = Sys.Date(), + timestamp = NULL, + event_types = NULL, + population = "none", + monadic = FALSE, + ..., + acled_access = TRUE, + prompt = TRUE, + log = F +) +} +\arguments{ +\item{email}{character string. Email associated with your ACLED account registered at \url{https://developer.acleddata.com}.} + +\item{key}{character string. Access key associated with your ACLED account registered at \url{https://developer.acleddata.com}.} + +\item{country}{character vector. Default is NULL, which will return events for all countries. Pass a vector of countries names to retrieve events from specific countries. The list of ACLED countries. names may be found via acledR::acled_countries.} + +\item{regions}{vector of region names (character) or region codes (numeric). Default is NULL, which will return events for all regions. Pass a vector of regions names or codes to retrieve events from countries. within specific regions. The list of ACLED regions may be found via acledR::acled_regions.} + +\item{start_date}{character string. Format 'yyyy-mm-dd'. The earliest date for which to return events. The default is \code{1997-01-01}, which is the earliest date available.} + +\item{end_date}{character string. Format 'yyyy-mm-dd'. The latest date for which to return events. The default is Sys.Date(), which is the most present date.} + +\item{timestamp}{numerical or character string. Provide a date or datetime written as either a character string of yyyy-mm-dd or as a numeric Unix timestamp to access all events added or updated after that date.} + +\item{event_types}{vector of one or more event types (character). Default is NULL, which will return data for all event types. To reurn data for only specific event types, request one or more of the following options (not case sensitive): Battles, Violence against civilians, Protests, Riots, Strategic Developments, and Explosions/Remote violence.} + +\item{population}{character. Specify whether to return population estimates for each event. It accepts three options: "none" (default), "best", and "full".} + +\item{monadic}{logical. If FALSE (default), returns dyadic data. If TRUE, returns monadic actor1 data.} + +\item{...}{string. Any additional parameters that users would like to add to their API calls (e.g. interaction or ISO)} + +\item{acled_access}{logical. If TRUE (default), you have used the acled_access function and the email and key arguments are not required.} + +\item{prompt}{logical. If TRUE (default), users will receive an interactive prompt providing information about their call (countries requested, number of estimated events, and number of API calls required) and asking if they want to proceed with the call. If FALSE, the call continues without warning, but the call is split and returns a message specifying how many calls are being made.} + +\item{log}{logical. If TRUE, it provides a dataframe with the countries and days requested, and how many calls it entails. The dataframe is provided INSTEAD of the normal ACLED dataset.} +} +\value{ +Returns a tibble of of ACLED events. +} +\description{ +This function allows users to easily request data from the ACLED API. Users can include variables such as country, regions, dates of interest and the format (monadic or dyadic). The function returns a tibble of the desired ACLED events. +} +\examples{ +\dontrun{ + +# Get all the events coded by ACLED in Argentina from 01/01/2022 until 02/01/2022 +# in dyadic-wide form +argen_acled <- acled_api( + email = "your_email", key = "your_key", + country = "Argentina", start_date = "2022-01-01", end_date = "2022-02-01", + acled_access = FALSE +) + +# tibble with all the events from Argentina where each row is one event. +argen_acled + +# Get all events coded by ACLED in the Caribbean from 01/01/2022 to 10/01/2022 +# in monadic-long form using email and key saved in environment + +acled_access(email = "your_email", key = "your_key") +carib_acled <- acled_api( + regions = "Caribbean", start_date = "2022-01-01", + end_date = "2022-01-10", monadic = TRUE, acled_access = TRUE +) + +## Tibble with all the events from the Caribbean where each row is one actor +carib_acled +} +} +\seealso{ +\itemize{ +\item ACLED API guide. \url{https://apidocs.acleddata.com/} +} + +Other API and Access: +\code{\link{acled_access}()}, +\code{\link{acled_deletions_api}()}, +\code{\link{acled_update}()} +} +\concept{API and Access} diff --git a/man/acled_codebook.Rd b/man/acled_codebook.Rd new file mode 100755 index 0000000..e90ce13 --- /dev/null +++ b/man/acled_codebook.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{acled_codebook} +\alias{acled_codebook} +\title{ACLED Codebook} +\format{ +A data frame: +\describe{ +\item{Variable}{Variable names} +\item{Description}{Text description of each variable} +\item{Values}{Text description of values for each variable}} +} +\usage{ +acled_codebook +} +\description{ +Codebook for ACLED data +} +\seealso{ +Other Data: +\code{\link{acled_countries}}, +\code{\link{acled_event_categories}}, +\code{\link{acled_interaction_codes}}, +\code{\link{acled_multipliers}}, +\code{\link{acled_old_deletion_dummy}}, +\code{\link{acled_old_dummy}}, +\code{\link{acled_regions}} +} +\concept{Data} +\keyword{datasets} diff --git a/man/acled_countries.Rd b/man/acled_countries.Rd new file mode 100755 index 0000000..e35a10e --- /dev/null +++ b/man/acled_countries.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{acled_countries} +\alias{acled_countries} +\title{ACLED Countries} +\format{ +A data frame: +\describe{ +\item{country}{Country names} +\item{region}{Region names} +\item{start_year}{First year coded by ACLED}} +} +\usage{ +acled_countries +} +\description{ +ACLED country names, regions, and coding start year +} +\seealso{ +Other Data: +\code{\link{acled_codebook}}, +\code{\link{acled_event_categories}}, +\code{\link{acled_interaction_codes}}, +\code{\link{acled_multipliers}}, +\code{\link{acled_old_deletion_dummy}}, +\code{\link{acled_old_dummy}}, +\code{\link{acled_regions}} +} +\concept{Data} +\keyword{datasets} diff --git a/man/acled_deletions_api.Rd b/man/acled_deletions_api.Rd new file mode 100755 index 0000000..3d4cfe2 --- /dev/null +++ b/man/acled_deletions_api.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/acled_deletions_api.R +\name{acled_deletions_api} +\alias{acled_deletions_api} +\title{Request data from the ACLED Deletions API} +\usage{ +acled_deletions_api( + email = NULL, + key = NULL, + date_deleted = NULL, + acled_access = TRUE, + log = F +) +} +\arguments{ +\item{email}{character string. Email associated with your ACLED account registered at \url{https://developer.acleddata.com}.} + +\item{key}{character string. Access key associated with your ACLED account registered at \url{https://developer.acleddata.com}.} + +\item{date_deleted}{character string. Format 'yyyy-mm-dd' or Unix timestamp. The query will return all deleted events including and after the requested date/timestamp.} + +\item{acled_access}{logical. If TRUE it means that you have utilized the acled_access function and there is no need for the email and key arguments.} + +\item{log}{Only for testing purposes: you can use this to check if all the variables in your call were handled properly.} +} +\value{ +Returns a tibble of ACLED data with columns for event_id_cnty and deleted_timestamp. +} +\description{ +This function allows users to pull deleted ACLED event IDs from the Deletions API. +} +\examples{ +\dontrun{ + +# Request deleted ACLED events since January 1, 2022 +acled_deletions_api(date_deleted = "2022-01-01", acled_acess = TRUE) +} +} +\seealso{ +\itemize{ +\item \href{https://acleddata.com/download/35306/}{ACLED API guide} +\item \href{https://acleddata.com/download/35179/}{Keeping ACLED data up to date guide} +} + +Other API and Access: +\code{\link{acled_access}()}, +\code{\link{acled_api}()}, +\code{\link{acled_update}()} +} +\concept{API and Access} diff --git a/man/acled_event_categories.Rd b/man/acled_event_categories.Rd new file mode 100755 index 0000000..2fb2f8b --- /dev/null +++ b/man/acled_event_categories.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{acled_event_categories} +\alias{acled_event_categories} +\title{ACLED Event Categories} +\format{ +A data frame: +\describe{ +\item{event_type}{ACLED event type} +\item{sub_event_type}{ACLED sub-event type} +\item{political_violence}{Dummy indicator for whether sub-event type falls within political violence} +\item{organized_political_violence}{Dummy indicator for whether sub-event type falls within organized political violence} +\item{disorder}{Dummy indicator for whether sub-event type falls within disorder} +\item{demonstrations}{Dummy indicator for whether sub-event type falls within demonstrations}} +} +\usage{ +acled_event_categories +} +\description{ +ACLED event and sub-event types, grouped by category +} +\seealso{ +Other Data: +\code{\link{acled_codebook}}, +\code{\link{acled_countries}}, +\code{\link{acled_interaction_codes}}, +\code{\link{acled_multipliers}}, +\code{\link{acled_old_deletion_dummy}}, +\code{\link{acled_old_dummy}}, +\code{\link{acled_regions}} +} +\concept{Data} +\keyword{datasets} diff --git a/man/acled_interaction_codes.Rd b/man/acled_interaction_codes.Rd new file mode 100644 index 0000000..615a13d --- /dev/null +++ b/man/acled_interaction_codes.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{acled_interaction_codes} +\alias{acled_interaction_codes} +\title{ACLED interaction codes} +\format{ +A data frame: +\describe{ +\item{Inter1/Inter2}{Actor type} +\item{Numeric Code}{Numeric equivalent found in the inter1 and inter2 column. }} +} +\usage{ +acled_interaction_codes +} +\description{ +ACLED interaction and actor types +} +\seealso{ +Other Data: +\code{\link{acled_codebook}}, +\code{\link{acled_countries}}, +\code{\link{acled_event_categories}}, +\code{\link{acled_multipliers}}, +\code{\link{acled_old_deletion_dummy}}, +\code{\link{acled_old_dummy}}, +\code{\link{acled_regions}} +} +\concept{Data} +\keyword{datasets} diff --git a/man/acled_multipliers.Rd b/man/acled_multipliers.Rd new file mode 100644 index 0000000..85d665a --- /dev/null +++ b/man/acled_multipliers.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{acled_multipliers} +\alias{acled_multipliers} +\title{ACLED Multipliers} +\format{ +A data frame: +\describe{ +\item{country}{Country names} +\item{bin}{Bin of event frequency} +\item{year}{Year corresponding to the bin} +\item{avg_month_bin}{Average monthly of the bin} +} +} +\usage{ +acled_multipliers +} +\description{ +A dataframe with additional information for each country, only for the purpose of estimating events. +} +\seealso{ +Other Data: +\code{\link{acled_codebook}}, +\code{\link{acled_countries}}, +\code{\link{acled_event_categories}}, +\code{\link{acled_interaction_codes}}, +\code{\link{acled_old_deletion_dummy}}, +\code{\link{acled_old_dummy}}, +\code{\link{acled_regions}} +} +\concept{Data} +\keyword{datasets} diff --git a/man/acled_old_deletion_dummy.Rd b/man/acled_old_deletion_dummy.Rd new file mode 100644 index 0000000..3cc392e --- /dev/null +++ b/man/acled_old_deletion_dummy.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{acled_old_deletion_dummy} +\alias{acled_old_deletion_dummy} +\title{Second dummy data frame of ACLED events emulating an old format, used in acled_deletion_api Vignette} +\format{ +A data frame: +\describe{ +\item{event_id_cnty}{An unique individual identifier by number and country acronym (updated annually)} +\item{event_date}{The day, month and year on which an event took place} +\item{year}{The year in which an event took place} +\item{time_precision}{A numeric code indicating the level of certainty of the date coded for the event} +\item{disorder_type}{Type of disorder associated with the event and sub event type} +\item{event_type}{The type of event} +\item{sub_event_type}{The type of sub-event} +\item{actor1}{The named actor involved in the event. Note: Actor 1 and Actor 2 do not imply directionality (e.g. attacker or defender)} +\item{assoc_actor_1}{The named actor associated with or identifying actor1} +\item{inter1}{A numeric code indicating the type of actor1} +\item{actor2}{The named actor involved in the event. Note: Actor 1 and Actor 2 do not imply directionality (e.g. attacker or defender)} +\item{assoc_actor_2}{The named actor associated with or identifying actor1} +\item{inter2}{A numeric code indicating the type of actor1} +\item{interaction}{A numeric code indicating the interaction between types of actor1 and actor2} +\item{civilian_targeting}{Column referencing the presence of civilian targeting} +\item{iso}{A numeric code for each individual country} +\item{region}{The region of the world where the event took place} +\item{country}{The country in which the event took place} +\item{admin1}{The largest sub-national administrative region in which the event took place} +\item{admin2}{The second largest sub-national administrative region in which the event took place} +\item{admin3}{The third largest sub-national administrative region in which the event took place} +\item{location}{The location in which the event took place} +\item{latitude}{The latitude of the location} +\item{longitude}{The longitude of the location} +\item{geo_precision}{A numeric code indicating the level of certainty of the location coded for the event} +\item{source}{The source of the event report} +\item{source_scale}{The scale (local, regional, national, international) of the source} +\item{notes}{A short description of the event} +\item{fatalities}{The number of reported fatalities which occurred during the event} +\item{tags}{Tags associated with the event.} +\item{timestamp}{Numeric code of time}} +} +\usage{ +acled_old_deletion_dummy +} +\description{ +Large dataset of multiple regions and countries, purposefully including deleted/modified events. +} +\seealso{ +Other Data: +\code{\link{acled_codebook}}, +\code{\link{acled_countries}}, +\code{\link{acled_event_categories}}, +\code{\link{acled_interaction_codes}}, +\code{\link{acled_multipliers}}, +\code{\link{acled_old_dummy}}, +\code{\link{acled_regions}} +} +\concept{Data} +\keyword{datasets} diff --git a/man/acled_old_dummy.Rd b/man/acled_old_dummy.Rd new file mode 100644 index 0000000..f5e3703 --- /dev/null +++ b/man/acled_old_dummy.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{acled_old_dummy} +\alias{acled_old_dummy} +\title{A dummy data frame of ACLED events emulating an old format, used in "Keeping your dataset updated" Vignette} +\format{ +A data frame: +\describe{ +\item{event_id_cnty}{An unique individual identifier by number and country acronym (updated annually)} +\item{event_date}{The day, month and year on which an event took place} +\item{year}{The year in which an event took place} +\item{time_precision}{A numeric code indicating the level of certainty of the date coded for the event} +\item{disorder_type}{Type of disorder associated with the event and sub event type} +\item{event_type}{The type of event} +\item{sub_event_type}{The type of sub-event} +\item{actor1}{The named actor involved in the event. Note: Actor 1 and Actor 2 do not imply directionality (e.g. attacker or defender)} +\item{assoc_actor_1}{The named actor associated with or identifying actor1} +\item{inter1}{A numeric code indicating the type of actor1} +\item{actor2}{The named actor involved in the event. Note: Actor 1 and Actor 2 do not imply directionality (e.g. attacker or defender)} +\item{assoc_actor_2}{The named actor associated with or identifying actor1} +\item{inter2}{A numeric code indicating the type of actor1} +\item{interaction}{A numeric code indicating the interaction between types of actor1 and actor2} +\item{civilian_targeting}{Column referencing the presence of civilian targeting} +\item{iso}{A numeric code for each individual country} +\item{region}{The region of the world where the event took place} +\item{country}{The country in which the event took place} +\item{admin1}{The largest sub-national administrative region in which the event took place} +\item{admin2}{The second largest sub-national administrative region in which the event took place} +\item{admin3}{The third largest sub-national administrative region in which the event took place} +\item{location}{The location in which the event took place} +\item{latitude}{The latitude of the location} +\item{longitude}{The longitude of the location} +\item{geo_precision}{A numeric code indicating the level of certainty of the location coded for the event} +\item{source}{The source of the event report} +\item{source_scale}{The scale (local, regional, national, international) of the source} +\item{notes}{A short description of the event} +\item{fatalities}{The number of reported fatalities which occurred during the event} +\item{tags}{Tags associated with the event.} +\item{timestamp}{Numeric code of time}} +} +\usage{ +acled_old_dummy +} +\description{ +Small dataset of events in Argentina, purposefully including events which are currently deleted/modified. +} +\seealso{ +Other Data: +\code{\link{acled_codebook}}, +\code{\link{acled_countries}}, +\code{\link{acled_event_categories}}, +\code{\link{acled_interaction_codes}}, +\code{\link{acled_multipliers}}, +\code{\link{acled_old_deletion_dummy}}, +\code{\link{acled_regions}} +} +\concept{Data} +\keyword{datasets} diff --git a/man/acled_regions.Rd b/man/acled_regions.Rd new file mode 100755 index 0000000..ad3baa9 --- /dev/null +++ b/man/acled_regions.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{acled_regions} +\alias{acled_regions} +\title{ACLED Regions} +\format{ +A data frame: +\describe{ +\item{region}{Region number} +\item{region_name}{Region names} +\item{first_event_date}{First date (yyyy-mm-dd) coded by ACLED}} +} +\usage{ +acled_regions +} +\description{ +ACLED region names, region numbers, and coding start dates +} +\seealso{ +Other Data: +\code{\link{acled_codebook}}, +\code{\link{acled_countries}}, +\code{\link{acled_event_categories}}, +\code{\link{acled_interaction_codes}}, +\code{\link{acled_multipliers}}, +\code{\link{acled_old_deletion_dummy}}, +\code{\link{acled_old_dummy}} +} +\concept{Data} +\keyword{datasets} diff --git a/man/acled_rounding.Rd b/man/acled_rounding.Rd new file mode 100755 index 0000000..4b8e224 --- /dev/null +++ b/man/acled_rounding.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/acled_rounding.R +\name{acled_rounding} +\alias{acled_rounding} +\title{Rounding function} +\usage{ +acled_rounding(num, digits = 0) +} +\arguments{ +\item{num}{int. This is the number we are trying to round.} + +\item{digits}{int. Where do we want to round up. It accepts 0 (whole number), 1 (tenth place), 2 (hundredths), etc.} +} +\description{ +This function addresses some of the conflicts of rounding in R, especially when trying to round up. +} +\details{ +This function is meant to address the problem of rounding in R where the approach is always round to even. The function is meant to round things following the simple rule. If the decimal is 5+ then round up, if not round down. With the 'digits' argument, one can set up the specificity of the rounding, 0= whole number, 1 = tenth place, 2=hundreds place, and so on. +} +\examples{ +x1 <- 1.569 +x2 <- 104.530 +x3 <- 54.430 +x4 <- 205.49999 +acled_rounding(x1) +acled_rounding(x2) +acled_rounding(x3) +acled_rounding(x4) +} +\concept{Helpers} diff --git a/man/acled_transform_interaction.Rd b/man/acled_transform_interaction.Rd new file mode 100644 index 0000000..c59dc15 --- /dev/null +++ b/man/acled_transform_interaction.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/acled_transform_interaction.R +\name{acled_transform_interaction} +\alias{acled_transform_interaction} +\title{Change interaction codes from numeric labels to string labels} +\usage{ +acled_transform_interaction(df, only_inters = F) +} +\arguments{ +\item{df}{dataframe. ACLED data including at least inter1, inter2 columns. If \code{only_inters} is TRUE, it also requires interaction column.} + +\item{only_inters}{boolean. Option whether to include the \emph{interaction} column in the transformation (if TRUE) or to only use \emph{inter1} and \emph{inter2} (if FALSE).} +} +\value{ +Returns a tibble of of ACLED events with modified \emph{inter1}, \emph{inter2} and potentially \emph{interaction} columns . +} +\description{ +This function allows users to change from numeric interaction codes (i.e. 1, 2, 3, etc) to string interaction codes (i.e. State Forces, Rebel Group, etc.) +} +\examples{ +\dontrun{ + +# Load data frame +argen_acled <- acled_api( + email = "your_email", key = "your_key", + country = "Argentina", start_date = "2022-01-01", end_date = "2022-02-01", + acled_access = FALSE +) + +# Transform the interactions +argen_acled_transformed <- acled_transformation_interaction(argen_acled, only_inters = F) +} +} +\seealso{ +Other Data Manipulation: +\code{\link{acled_transform_longer}()}, +\code{\link{acled_transform_wider}()} +} +\concept{Data Manipulation} diff --git a/man/acled_transform_longer.Rd b/man/acled_transform_longer.Rd new file mode 100644 index 0000000..9a35bda --- /dev/null +++ b/man/acled_transform_longer.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/acled_transform_longer.R +\name{acled_transform_longer} +\alias{acled_transform_longer} +\title{Transform ACLED data from wide to long} +\usage{ +acled_transform_longer(data, type = "full_actors") +} +\arguments{ +\item{data, }{dataframe or tibble containing your dataset.} + +\item{type, }{character string. One of five types: full_actors, main_actors, assoc_actors, source, or all. +\itemize{ +\item full_actors: All actor and associated actor columns +\item main_actors: Actor 1 and Actor 2 columns +\item assoc_actors: All associated actor columns +\item source: The source column becomes monadic +}} +} +\value{ +A tibble with the data transformed into long form. +} +\description{ +Function to convert your ACLED's API calls (if dyadic) into desired monadic forms. +} +\examples{ +\dontrun{ +# argen_acled <- acled_api(country = "Argentina",start_date = "2022-01-01", +# end_date="2022-02-01", acled_access = T, prompt = F) + +# argen_acled_long_actors <- acled_transform_wide_to_long(argen_acled, +# type = "full_actor") # Transforming the data + +# nrow(argen_acled_long_actors) # Number of rows in the dataset +# [1] 263 # Long form + +# nrow(argen_acled) ) # Number of rows in the dataset +# [1] 145 # Wide form +} +} +\seealso{ +Other Data Manipulation: +\code{\link{acled_transform_interaction}()}, +\code{\link{acled_transform_wider}()} +} +\concept{Data Manipulation} diff --git a/man/acled_transform_wider.Rd b/man/acled_transform_wider.Rd new file mode 100644 index 0000000..66fe706 --- /dev/null +++ b/man/acled_transform_wider.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/acled_transform_wider.R +\name{acled_transform_wider} +\alias{acled_transform_wider} +\title{Reverse Transform ACLED Data from Long to Wide} +\usage{ +acled_transform_wider(data, type = "full_actors") +} +\arguments{ +\item{data, }{a dataframe or tibble containing your dataset.} + +\item{type, }{a character string. One of five types: full_actors, main_actors, assoc_actors, source, or all. +\itemize{ +\item full_actors: All actor and associated actor columns +\item main_actors: Actor 1 and Actor 2 columns +\item assoc_actors: All associated actor columns +\item source: The source column becomes dyadic +\item api_monadic: Use this option for data that is the output of the API's monadic option. +}} +} +\value{ +A tibble with the data transformed back into wide form. +} +\description{ +Function to convert your ACLED's API calls (if monadic) back into the original dyadic forms. +} +\examples{ +\dontrun{ +# argen_acled <- acled_api(country = "Argentina",start_date = "2022-01-01", +# end_date="2022-02-01", acled_access = T, prompt = F) +# argen_acled_long_actors <- acled_transform_longer(argen_acled, +# type = "full_actor") # Transforming the data to long form + +# argen_acled_wide <- acled_transform_wider(argen_acled_long_actors, +# type = "full_actor") # Transforming the data back to wide form + +# nrow(argen_acled_wide) # Number of rows in the dataset +# [1] 145 # Wide form + +# nrow(argen_acled_long_actors) # Number of rows in the dataset +# [1] 263 # Long form +} +} +\seealso{ +Other Data Manipulation: +\code{\link{acled_transform_interaction}()}, +\code{\link{acled_transform_longer}()} +} +\concept{Data Manipulation} diff --git a/man/acled_update.Rd b/man/acled_update.Rd new file mode 100644 index 0000000..0c54a4e --- /dev/null +++ b/man/acled_update.Rd @@ -0,0 +1,75 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/acled_update.R +\name{acled_update} +\alias{acled_update} +\title{Updating your ACLED dataset} +\usage{ +acled_update( + df, + start_date = min(df$event_date), + end_date = max(df$event_date), + additional_countries = "current countries", + regions = NULL, + event_types = NULL, + acled_access = TRUE, + email = NULL, + key = NULL, + deleted = TRUE, + prompts = TRUE +) +} +\arguments{ +\item{df}{The dataframe to update, it has to have the same structure as ACLED's dyadic dataframe (i.e. the result of \code{acled_api()})} + +\item{start_date}{The first date of events you want to update from.. These are the celling and floor of \emph{event_date}, not of \emph{timestamp}.} + +\item{end_date}{The last date of events you want to update from. These are the celling and floor of \emph{event_date}, not of \emph{timestamp}.} + +\item{additional_countries}{string. Additional additional_countries to update your dataset. It defaults to “current countries”, which includes all the additional_countries inside your dataset.} + +\item{regions}{string. The regions for which you would like events in your dataset updated.} + +\item{event_types}{string. The event types for which you would like events in your dataset updated.} + +\item{acled_access}{logical. If you have already used \code{acled_access()}, you can set this option as TRUE (default) to avoid having to input your email and access key.} + +\item{email}{character string. Email associated with your ACLED account registered at \url{https://developer.acleddata.com}.} + +\item{key}{character string. Access key associated with your ACLED account registered at \url{https://developer.acleddata.com}.} + +\item{deleted}{logical. If TRUE (default), the function will also remove deleted events using acled_deletions_api().} + +\item{prompts}{logical. If TRUE (default), users will receive an interactive prompt providing information about their call (additional_countries requested, number of country-days, and number of API calls required) and asking if they want to proceed with the call. If FALSE, the call continues without warning, but the call is split and returns a message specifying how many calls are being made.} +} +\value{ +Tibble with updated ACLED data and a newer timestamp. +} +\description{ +This function is meant to help you keep your dataset updated, by automatically checking for new and modified events, as well as deleted events (if deleted = TRUE). +Note: The function makes new API calls to gather new and modified events. +} +\examples{ +\dontrun{ +# Updating dataset to include newer data from Argentina + +acledR::acled_access(email = "your_email", key = "your_key") + +new_argen_dataset <- acled_update(acledR::acled_old_dummy, + additional_countries = "Argentina", + acled_access = TRUE, + prompts = FALSE +) +} + +} +\seealso{ +\itemize{ +\item ACLED Keeping your dataset updated guide. \url{https://acleddata.com/download/35179/} +} + +Other API and Access: +\code{\link{acled_access}()}, +\code{\link{acled_api}()}, +\code{\link{acled_deletions_api}()} +} +\concept{API and Access} diff --git a/man/figures/logo.png b/man/figures/logo.png new file mode 100644 index 0000000..4cfa42e Binary files /dev/null and b/man/figures/logo.png differ diff --git a/man/generate_counts.Rd b/man/generate_counts.Rd deleted file mode 100644 index b3740b2..0000000 --- a/man/generate_counts.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/generate_counts.R -\name{generate_counts} -\alias{generate_counts} -\title{Generate event counts from ACLED data} -\usage{ -generate_counts( - data, - event_type = NULL, - unit_id, - time_id, - time_target, - start_date = NULL, - end_date = NULL, - add_unit_ids = NULL -) -} -\arguments{ -\item{data}{ACLED data} - -\item{event_type}{Event types to include} - -\item{unit_id}{Unit variable} - -\item{time_id}{Temporal variable} - -\item{time_target}{Target temporal unit} - -\item{start_date}{Earliest date to include} - -\item{end_date}{Latest date to include} - -\item{add_unit_ids}{Option to add in units with no events at certain time periods} -} -\value{ -Returns a tibble grouped by unit_id -} -\description{ -Generate event counts from ACLED data -} diff --git a/man/generate_movers.Rd b/man/generate_movers.Rd deleted file mode 100644 index 54ac2c1..0000000 --- a/man/generate_movers.Rd +++ /dev/null @@ -1,36 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/generate_movers.R -\name{generate_movers} -\alias{generate_movers} -\title{Generate event counts from ACLED data} -\usage{ -generate_movers( - data, - var, - unit_id, - time_id, - slide_funs, - slide_periods, - na.rm = T, - complete = T -) -} -\arguments{ -\item{data}{ACLED data.} - -\item{unit_id}{Unit variable.} - -\item{time_id}{Temporal variable.} - -\item{slide_funs}{Requested moving statistics. Character vector with options including mean, median, min, and max.} - -\item{slide_periods}{How many periods in the past to summarize over. Vector of one or more integers. Inf includes all previous periods.} - -\item{na.rm}{Whether to include NAs in the calculations.} -} -\value{ -Returns a tibble grouped by unit_id. -} -\description{ -Generate event counts from ACLED data -} diff --git a/man/pipe.Rd b/man/pipe.Rd new file mode 100755 index 0000000..1f8f237 --- /dev/null +++ b/man/pipe.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-pipe.R +\name{\%>\%} +\alias{\%>\%} +\title{Pipe operator} +\usage{ +lhs \%>\% rhs +} +\arguments{ +\item{lhs}{A value or the magrittr placeholder.} + +\item{rhs}{A function call using the magrittr semantics.} +} +\value{ +The result of calling `rhs(lhs)`. +} +\description{ +See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +} +\keyword{internal} diff --git a/pkgdown/favicon/apple-touch-icon-120x120.png b/pkgdown/favicon/apple-touch-icon-120x120.png new file mode 100644 index 0000000..0738672 Binary files /dev/null and b/pkgdown/favicon/apple-touch-icon-120x120.png differ diff --git a/pkgdown/favicon/apple-touch-icon-152x152.png b/pkgdown/favicon/apple-touch-icon-152x152.png new file mode 100644 index 0000000..2ef1fae Binary files /dev/null and b/pkgdown/favicon/apple-touch-icon-152x152.png differ diff --git a/pkgdown/favicon/apple-touch-icon-180x180.png b/pkgdown/favicon/apple-touch-icon-180x180.png new file mode 100644 index 0000000..5896072 Binary files /dev/null and b/pkgdown/favicon/apple-touch-icon-180x180.png differ diff --git a/pkgdown/favicon/apple-touch-icon-60x60.png b/pkgdown/favicon/apple-touch-icon-60x60.png new file mode 100644 index 0000000..4ad0d4e Binary files /dev/null and b/pkgdown/favicon/apple-touch-icon-60x60.png differ diff --git a/pkgdown/favicon/apple-touch-icon-76x76.png b/pkgdown/favicon/apple-touch-icon-76x76.png new file mode 100644 index 0000000..8aa3370 Binary files /dev/null and b/pkgdown/favicon/apple-touch-icon-76x76.png differ diff --git a/pkgdown/favicon/apple-touch-icon.png b/pkgdown/favicon/apple-touch-icon.png new file mode 100644 index 0000000..5d55712 Binary files /dev/null and b/pkgdown/favicon/apple-touch-icon.png differ diff --git a/pkgdown/favicon/favicon-16x16.png b/pkgdown/favicon/favicon-16x16.png new file mode 100644 index 0000000..f6d3291 Binary files /dev/null and b/pkgdown/favicon/favicon-16x16.png differ diff --git a/pkgdown/favicon/favicon-32x32.png b/pkgdown/favicon/favicon-32x32.png new file mode 100644 index 0000000..fda73d9 Binary files /dev/null and b/pkgdown/favicon/favicon-32x32.png differ diff --git a/pkgdown/favicon/favicon.ico b/pkgdown/favicon/favicon.ico new file mode 100644 index 0000000..5c9b8b3 Binary files /dev/null and b/pkgdown/favicon/favicon.ico differ diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..eba0dc5 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,12 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/tests.html +# * https://testthat.r-lib.org/reference/test_package.html#special-files + +library(testthat) +library(acledR) + +test_check("acledR") diff --git a/tests/testthat/setup-acled_access.R b/tests/testthat/setup-acled_access.R new file mode 100644 index 0000000..b492d43 --- /dev/null +++ b/tests/testthat/setup-acled_access.R @@ -0,0 +1,5 @@ + +# Setup for test acled_access + +# Run the function to set up the enviornment credentials +acled_access(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY")) diff --git a/tests/testthat/setup-acled_api.R b/tests/testthat/setup-acled_api.R new file mode 100644 index 0000000..a22bfea --- /dev/null +++ b/tests/testthat/setup-acled_api.R @@ -0,0 +1,42 @@ +# Helpers for test-acled_api.R + +received_data <- acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"),country="Argentina", start_date="2022-01-01",end_date = "2022-12-31",prompt = F, acled_access = T, log = F) +received_data_monadic <- acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"),country = "Argentina", start_date="2022-01-01",end_date = "2022-12-31",prompt = F, monadic = T, acled_access = F, log = F) +log_received_data <- acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"),regions = c("Western Africa", "Eastern Africa", "Europe"), start_date="2022-01-01",end_date = "2022-12-31",prompt = F, acled_access = F, log = T) +received_data_numeric_region <- acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"),regions = 1,prompt = F, acled_access = F) + +timestamp_numeric_check <- acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + country = "Argentina", + start_date="2023-01-01",end_date = "2023-06-06", + timestamp = 1681622333, # as numeric + prompt = F, acled_access = F, log = F) + +timestamp_string_check <- acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + country = "Argentina", + start_date="2023-01-01",end_date = "2023-06-06", + timestamp = "2023-04-16", # as numeric + prompt = F, acled_access = F, log = F) + +# For checking credentials +log_received_data_check_credential <- acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + regions = c("Western Africa", "Eastern Africa", "Europe"),end_date = "2022-12-31",prompt = F, acled_access = T, log = T) + + +columns <- c("event_id_cnty","event_date","year","time_precision","disorder_type", + "event_type","sub_event_type","actor1","assoc_actor_1","inter1","actor2","assoc_actor_2", + "inter2","interaction","civilian_targeting","iso","region","country","admin1","admin2","admin3","location","latitude", + "longitude","geo_precision","source","source_scale","notes","fatalities","tags","timestamp") + +received_data_country_and_region <- acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + country = "Argentina", + regions = "Central America", + start_date="2022-01-01", + end_date = "2022-12-31", + prompt = F, acled_access = F) + +received_data_country_and_region_num <- acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + country = "Argentina", + regions = 14, + start_date="2022-01-01", + end_date = "2022-12-31", + prompt = F, acled_access = F) diff --git a/tests/testthat/setup-acled_deletions_api.R b/tests/testthat/setup-acled_deletions_api.R new file mode 100644 index 0000000..46ce160 --- /dev/null +++ b/tests/testthat/setup-acled_deletions_api.R @@ -0,0 +1,9 @@ +# Helpers for test-acled_api.R + +received_deleted_data_date <- acled_deletions_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), date_deleted = "2022-07-25", acled_access = F) +received_deleted_data_unix <- acled_deletions_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), date_deleted = "1658707200", acled_access = F) + + +received_deleted_log <- acled_deletions_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), date_deleted = "1658707200", acled_access = F, log = T) + +columns_deleted <- c("event_id_cnty", "deleted_timestamp") diff --git a/tests/testthat/setup-acled_transform_interaction.R b/tests/testthat/setup-acled_transform_interaction.R new file mode 100644 index 0000000..de2aa0c --- /dev/null +++ b/tests/testthat/setup-acled_transform_interaction.R @@ -0,0 +1,32 @@ +test <- acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + country = "Argentina", start_date="2022-01-01",end_date = "2022-12-31", + prompt = F, acled_access = F, log = F) + +test_changes <- test %>% + left_join(acledR::acled_interaction_codes, by = c("inter1" = "Numeric Code")) %>% + select(-inter1) %>% + rename(inter1 = "Inter1/Inter2") %>% + relocate(inter1, .after = assoc_actor_1) %>% + left_join(acledR::acled_interaction_codes, by = c("inter2" = "Numeric Code")) %>% + select(-inter2) %>% + rename(inter2 = "Inter1/Inter2") %>% + relocate(inter2, .after = assoc_actor_2) %>% + mutate(interaction = case_when( + str_detect(interaction, "10") ~ "Sole State Forces", + str_detect(interaction, "20") ~ "Sole Rebel Groups", + str_detect(interaction, "30") ~ "Sole Political Militias", + str_detect(interaction, "40") ~ "Sole Identity Militias", + str_detect(interaction, "50") ~ "Sole Rioters", + str_detect(interaction, "60") ~ "Sole Protesters", + str_detect(interaction, "70") ~ "Sole Civilians", + str_detect(interaction, "80") ~ "Sole Others", + TRUE ~ as.character(interaction))) %>% + mutate(interaction = str_replace_all(interaction, "(\\d)(\\d)", "\\1-\\2"), + interaction = str_replace(as.character(interaction), "1", "State Forces"), + interaction = str_replace(as.character(interaction), "2", "Rebel Groups"), + interaction = str_replace(as.character(interaction), "3", "Political Militias"), + interaction = str_replace(as.character(interaction), "4", "Identity Militias"), + interaction = str_replace(as.character(interaction), "5", "Rioters"), + interaction = str_replace(as.character(interaction), "6", "Protesters"), + interaction = str_replace(as.character(interaction), "7", "Civilians"), + interaction = str_replace(as.character(interaction), "8", "External/Other Forces")) diff --git a/tests/testthat/setup-acled_update.R b/tests/testthat/setup-acled_update.R new file mode 100644 index 0000000..a029022 --- /dev/null +++ b/tests/testthat/setup-acled_update.R @@ -0,0 +1,21 @@ + + + +dupes_checks <- acled_update(acledR::acled_old_dummy, + email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + acled_access = F, prompts = F) + +dupes_checks_plus_bramex <- acled_update(acledR::acled_old_dummy, + additional_countries = c("Brazil","Mexico"), + email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + acled_access = F, prompts = F) + + +test_more_than_one <- acled_update(acledR::acled_old_deletion_dummy, + email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + acled_access = F, prompts = F) + +find_deleted_events <- acled_deletions_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + date_deleted = max(acledR::acled_old_deletion_dummy$timestamp), + acled_access = F) + diff --git a/tests/testthat/test-acled_access.R b/tests/testthat/test-acled_access.R new file mode 100644 index 0000000..6e71693 --- /dev/null +++ b/tests/testthat/test-acled_access.R @@ -0,0 +1,20 @@ + +# Does it save the credentials in the enviornment? - Missing + +test_that("acled_access properly stores the credentials", { + expect_equal(Sys.getenv("acled_email"),"acledexamples@gmail.com") + expect_equal(nchar(Sys.getenv("acled_key")), 20) +}) + + +# Shows the message that it was successful? +test_that("It shows that it works", { + expect_message(acled_access(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY")), "Success! Credentials authorized") +}) + + +# Does it shows a message when it fails? +test_that("It shows that is doesn't work", { + expect_error(acled_access(email = "an@email!!!", key = "akey!!!"), + regex = "Key and email not authorized.*") +}) diff --git a/tests/testthat/test-acled_api.R b/tests/testthat/test-acled_api.R new file mode 100644 index 0000000..fb111db --- /dev/null +++ b/tests/testthat/test-acled_api.R @@ -0,0 +1,322 @@ +# acled_api unit testing + +# Basic functioning ---- +test_that("number of columns is correct", { + expect_equal(ncol(received_data),31) +}) + +test_that("names of columns are correct", { + expect_equal(names(received_data),columns) +}) + +## test if event_type filters work---- +test_that("event_type filters work or not",{ + + expect_equal(unique(acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + start_date="2022-01-01",end_date = "2022-12-31", country = "Argentina", + event_type = "Protests", prompt = F, acled_access = F, log = F)$event_type), "Protests" ) + +}) + +## Handling big calls ---- +test_that("Split calls for big calls", { + expect_equal(as.numeric(ceiling(sum(log_received_data$time)/300000)),max(log_received_data$calls)) +}) + +test_that("country days are calculated as expected",{ + argentina_country_days <- acledR::acled_countries %>% + filter(country == "Argentina") %>% + mutate(t_end = lubridate::ymd("2021-01-01"), + unit_test = t_end - ymd(paste0(start_year, "-01-01"))) + + argentina_test_call <- acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + country = "Argentina", start_date="1998-01-01", + end_date = "2021-01-01",prompt = F, acled_access = F, log = T) + + expect_equal(argentina_test_call$time, argentina_country_days$unit_test) + +}) + +local({ + + local_mocked_bindings(menu = function(choices,title=NULL) 1) + + test_that("Users continue call", { + + expect_equal(acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + start_date="2022-01-01",end_date = "2022-12-31",country = "Argentina", + prompt = T, acled_access = F, log = F), acled_api(email = "acledexamples@gmail.com", key = "M3PWwg3DIdhHMuDiilp5", + start_date="2022-01-01",end_date = "2022-12-31",country = "Argentina", + prompt = F, acled_access = F, log = F)) + }) +}) # test on whether they can continue + +## Regions are managed properly ---- +test_that("Regions in numeric work",{ + expect_true(all.equal(data.frame(region="Western Africa",rows=1:nrow(received_data_numeric_region))$region,received_data_numeric_region$region))}) + +## Test what happens when someone requests a region and a country of another region ---- + +test_that("Testing that when requestion a region, and a country of another region, you get both",{ + + list_countries <- acledR::acled_countries %>% + filter(region == "Central America") %>% + unique(x=.$country) %>% + append("Argentina") + + + expect_setequal(unique(received_data_country_and_region$country), list_countries) + +}) + +test_that("When requesting a region with a numeric input, and a country of another region, you get both",{ + + list_countries <- acledR::acled_countries %>% + filter(region == "Central America") %>% + unique(x=.$country) %>% + append("Argentina") + + expect_setequal(unique(received_data_country_and_region_num$country), list_countries) + +}) + +## Timestamp works as required ---- + +test_that("timestamp (numeric) actually gets used as filter", { + expect_gte(min(timestamp_numeric_check$timestamp), 1681622333) +}) + +test_that("timestamp (string) actually gets used as filter", { + expect_gte(min(timestamp_string_check$timestamp), 1681588800) +}) + +## A menu is prompted when the user provides a non-recognized timestamp, allowing users to either stop or continue---- + +local({ + + local_mocked_bindings(menu = function(choices,title=NULL) 2) + + test_that("A user can stop a call when the provided timestamp is not recognized",{ + + expect_error(acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + start_date="2022-01-01",end_date = "2022-12-31",country = "Argentina", + timestamp = "muchachos", prompt = F, acled_access = F, log = F), regexp = "User requested") + }) +}) + +local({ + + local_mocked_bindings(menu = function(choices,title=NULL) 1) + + test_that("A user can ignore the provided timestamp if it is not recognized",{ + + expect_no_error(acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + start_date="2022-01-01",end_date = "2022-12-31",country = "Argentina", + timestamp = "muchachos", prompt = F, acled_access = F, log = F)) + }) +}) + + +## When asking for monadics, it returns monadics ---- + +test_that("The call actually returns monadics.", { + expect_equal(min(received_data_monadic$event_date), min(received_data$event_date)) + + expect_equal(max(received_data_monadic$event_date), max(received_data$event_date)) + + expect_equal(unique(received_data_monadic$country), unique(received_data$country)) + + expect_gte(nrow(received_data_monadic), nrow(received_data)) +}) + + +# Testing that population columns are returned when requested\ + +test_that("Population columns are being received", { + + population_cols <- c("population_1km","population_2km","population_5km","population_best") + + received_data_pops <- acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + country="Argentina", start_date="2022-01-01",end_date = "2022-01-04", + population='full',prompt = F, acled_access = F) + + received_data_best <- acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + country="Argentina", start_date="2022-01-01",end_date = "2022-01-04", + population='best',prompt = F, acled_access = F) + + + expect_true(all(population_cols %in% colnames(received_data_pops))) + expect_true("population_best" %in% colnames(received_data_best)) + +}) + + + + +# Errors ---- +## Error when someone requests a region that does not exist---- + +test_that("Error prompted when region does not exist", { + expect_error(acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"),regions = "Narnia", + start_date="2022-01-01",end_date = "2022-12-31",prompt = F, acled_access = F, log = F), regexp = "One or more requested region names not in the ACLED country list.") +}) + +test_that("Error when region number does not exist", { + expect_error(acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"),regions = 420, + start_date="2022-01-01",end_date = "2022-12-31",prompt = F, acled_access = F, log = F), + regexp = "One or more requested region numbers not in the ACLED country list") +}) + + +## Errors when a country requested doesnt exists ---- +test_that("Error when one of two countries are wrong",{ + expect_error(acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"),country = c("Argentia","Bolivia"), + start_date="2022-01-01",end_date = "2022-12-31",prompt = F, acled_access = F, log = F), + regexp = "One or more of the requested *")}) + +## Test what happens when someone inputs acled_access as TRUE but it includes email and key. ---- +test_that("Acled_access is ignored",{ + expect_true(grepl("acledexamples", log_received_data_check_credential$email[1])) +}) + +# Test errors from incorrectly input arguments. ---- + +test_that("acled_api() throws an error when called with invalid arguments", { + + expect_error(acled_api(Country = "Argentina", + start_date="2022-01-01", + end_date = "2022-12-31", + prompt = F, + acled_access = T, + log = F), regexp= + "Country is not a valid option. Please utilize \"country\", without capitalizing") + + + expect_error(acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + Region = "North America", + start_date="2022-01-01", + end_date = "2022-12-31", + prompt = F, + acled_access = T, + log = F), regexp= + "Region is not a valid option. Please utilize \"regions\"") + + expect_error(acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + Regions = "North America", + start_date="2022-01-01", + end_date = "2022-12-31", + prompt = F, + acled_access = T, + log = F), regexp= + "Regions is not a valid option. Please utilize \"regions\", without capitalizing") + + expect_error(acled_api(Event_type = "Argentina", + start_date="2022-01-01", + end_date = "2022-12-31", + prompt = F, + acled_access = T), regexp= + "Event type is not a valid option. Please utilize \"event_types\", without capitalizing") + expect_error(acled_api(country = "Argentina", + Start_date="2022-01-01", + end_date = "2022-12-31", + prompt = F, + acled_access = T), regexp= + "Start_date is not a valid option. Please utilize \"start_date\", without capitalizing") + expect_error(acled_api(country = "Argentina", + start_date="2022-01-01", + End_date = "2022-12-31", + prompt = F, + acled_access = T), regexp= + "End_date is not a valid option. Please utilize \"end_date\", without capitalizing") +}) + +# Test errors from badly utilized acled_access and key/email combination---- +test_that("If access is TRUE and credentials are null, credentials are ignored, but an error appears if Keys are empty in the enviornemt", { + expect_error( + Sys.setenv("acled_key" = "") %>% + acled_api(Country = "Argentina", + start_date="2022-01-01", + end_date = "2022-12-31", + prompt = F, + acled_access = T, + log = F), regexp = "acled_access is TRUE, but email and/or key are not stored in the enviornment. Please rerun acled_access or include key and email in function") +}) + +test_that("Users gets an error when acled_access is False, but no key or email are provided.", { + + expect_error(acled_api(country = "Argentina", start_date="2022-01-01", + end_date = "2022-12-31", prompt = F, acled_access = F), regexp = "Email address required") + + expect_error(acled_api(email = "stuff",country = "Argentina", start_date="2022-01-01", + end_date = "2022-12-31", prompt = F, acled_access = F), regexp = "Key required") + + + +}) + + +# Test error if start_date is after end_date ---- +test_that("start_date is after end_date", { + expect_error( + acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + country = "Argentina", + start_date="2022-01-01", + end_date = "2021-01-01", + prompt = F, + acled_access = F, + log = F), regexp = "Requested \'start_date\'")}) + +# Error when timestamp is from a date later than today ---- + +test_that("timestamp is from a latter date than today." ,{ + + expect_error(acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + country = "Argentina", + start_date="2021-01-01", + end_date = "2022-01-01", + prompt = F, + acled_access = F, + timestamp = paste0(year(now())+1, "01-01"), # Way to make it always in the future + log = F), regexp = "The timestamp cannot be" ) +}) + +# Error when requesting non-existent event types ---- + +test_that("Error when non existent event types",{ + expect_error(acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + country = "Argentina", + start_date="2021-01-01", + end_date = "2022-01-01", + event_types = c("Protests","Superhero fight"), + prompt = F, + acled_access = F, + log = F), regexp = "One or more requested event types are not in the ACLED data.") +}) + +# A message appears that acled_access is being ignored, and the proper credentials are being used.---- + +test_that("A warning appears that acled_access is being ignored, and the proper credentials are being used.",{ + + alog <- acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + acled_access = T, log = T) + + expect_message(acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + acled_access = T, log = T), regexp = "acled_access is TRUE, but email and key are included in the function. Ignoring acled_access.") + + expect_true(grepl("acledexamples", alog$email[1])) +}) + + + + +# Weird cases ---- + +# Ensure tables do not display blanks, and display NAs instead + +test_that("Tables display NAs instead of blanks", { + + expect_equal(nrow(filter(received_data, if_any(everything(), ~ sjmisc::is_empty(.x, all.na.empty = F)))), 0) + + +}) + diff --git a/tests/testthat/test-acled_deletions_api.R b/tests/testthat/test-acled_deletions_api.R new file mode 100644 index 0000000..1c52366 --- /dev/null +++ b/tests/testthat/test-acled_deletions_api.R @@ -0,0 +1,97 @@ +# Test for acled_deletions_api + +# Basic check ---- + +## Received data ---- +test_that("number of columns is correct - date", { + expect_equal(ncol(received_deleted_data_date),2) +}) + +test_that("number of columns is correct - unix", { + expect_equal(ncol(received_deleted_data_unix),2) +}) + +test_that("names of columns are correct - date", { + expect_equal(names(received_deleted_data_date),columns_deleted) +}) + +test_that("names of columns are correct - unix", { + expect_equal(names(received_deleted_data_unix),columns_deleted) +}) + +## Test that email and key are handled appropiately ---- + +test_that("Email and key are handled as expected without acled_access",{ + expect_true(grepl("acledexamples", received_deleted_log$email[1]))} +) + +## Test that acled_access is handled appropiately. ---- + +test_that("Email and Key are handled appropiately",{ + acled_access(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY")) + + some_log <- acled_deletions_api(date_deleted = "1658707200", acled_access = T, log = T) + + expect_true(grepl("acledexamples", some_log$email[1]))} +) + +## Date and unix return the same output ---- +test_that("names of columns are correct - unix", { + expect_true(all.equal(received_deleted_data_date, received_deleted_data_unix)) +}) + + + + +# Errors + +## If users do not set a timestamp, they will get all the deleted events in the dataset. ---- + +test_that("users can opt not to include a timestamp and they will get all the possible events", { + + stuff <- acled_deletions_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), acled_access = F, log = F) + + expect_gt(nrow(stuff), 140150) +}) + +# Errors---- +## Errors from badly written credentials ---- + +test_that("Users get an error if the email or key are not provided", { + + expect_error(acled_deletions_api(date_deleted = "1658707200", acled_access = F, log = T), regexp = "Email address required for ACLED API access") + + expect_error(acled_deletions_api(key = "key", + date_deleted = "1658707200", acled_access = F, log = T), regexp = "Email address required for ACLED API access") + + expect_error(acled_deletions_api(email = "Email", + date_deleted = "1658707200", acled_access = F, log = T), regexp = 'Key required for ACLED API access' ) + +}) + + + +## Test errors from badly utilized acled_access and key/email combination ---- +test_that("If access is TRUE and credentials are null, credentials are ignored, but an error appears if Keys are empty in the enviornemt", { + expect_error( + Sys.setenv("acled_key" = "") %>% + acled_deletions_api(acled_access = T,log = F), regexp = "acled_access is TRUE, but email and/or key are not stored in the enviornment. Please rerun acled_access or include key and email in function") +}) + + + +## A message appears that acled_access is being ignored, and the proper credentials are being used. ---- + +test_that("A warning appears that acled_access is being ignored, and the proper credentials are being used.",{ + + alog <- acled_deletions_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + date_deleted = "1658707200",acled_access = T, log = T) + + expect_message(acled_deletions_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + date_deleted = "1658707200",acled_access = T, log = T), regexp = "acled_access is TRUE, but email and key are included in the function. Ignoring acled_access.") + + expect_true(grepl("acledexamples", alog$email[1])) + }) + + + diff --git a/tests/testthat/test-acled_rounding.R b/tests/testthat/test-acled_rounding.R new file mode 100644 index 0000000..a3658c4 --- /dev/null +++ b/tests/testthat/test-acled_rounding.R @@ -0,0 +1,30 @@ + +test_that("acled_rounding behaves as expected", { + # Test cases where acled_rounding and round give different results + expect_equal(acled_rounding(1.5), 2) + expect_equal(acled_rounding(2.5), 3)} + ) + +test_that("acled_rounding gives the same result as round when rounding .6+",{ + # Test case where acled_rounding and round give the same result + expect_equal(acled_rounding(2.6), 3)} + ) + +test_that("Rounding to a specific decimal place",{ + + expect_equal(acled_rounding(1.56, 1), 1.6) + expect_equal(acled_rounding(2.34, 1), 2.3)} + ) + +test_that("Rounding to a specific decimal place where acled_rounding and round give different results",{ + expect_equal(acled_rounding(2.45, 1), 2.5)} + ) + +test_that("Works on negative numbers", { + expect_equal(acled_rounding(-2.5), -2)} + ) + +test_that("Integer is fine", { + # Test input that's already an integer + expect_equal(acled_rounding(2), 2)}) + diff --git a/tests/testthat/test-acled_transform_interaction.R b/tests/testthat/test-acled_transform_interaction.R new file mode 100644 index 0000000..25a1368 --- /dev/null +++ b/tests/testthat/test-acled_transform_interaction.R @@ -0,0 +1,36 @@ +# Check that it produces the same result as intended + + +test_that("The function swaps the interaction codes properly",{ + + expect_equal(acled_transform_interaction(test, only_inters = F), test_changes) + +}) + + +# Errors and warnings ---- + + +# Error when there is a wrong data structure +test_that("Returns an error when the function receives a df without inter1 or inter2", { + + expect_error(acled_transform_interaction(dplyr::select(test,-inter1)), regexp = "The input dataframe does not contain 'inter1' column") + + expect_error(acled_transform_interaction(dplyr::select(test,-inter2)), regexp = "The input dataframe does not contain 'inter2' column") + + expect_error(acled_transform_interaction(dplyr::select(test,-interaction)), regexp = "The input dataframe does not contain 'interaction' column") +}) + +# Error where are inter codes that are not recognized + +test_that("Returns an error when there are unrecognized inter codes", { + + + test3 <- test + test3[1,10] <- 9 + + expect_error(acled_transform_interaction(test3), regexp = "One or more interaction codes were not recognized.") +}) + + + diff --git a/tests/testthat/test-acled_transform_longer.R b/tests/testthat/test-acled_transform_longer.R new file mode 100644 index 0000000..1acdf47 --- /dev/null +++ b/tests/testthat/test-acled_transform_longer.R @@ -0,0 +1,63 @@ +# Test data type ---- +# Test that the function returns a tibble +test_that("returns a tibble", { + data <- data.frame(actor1 = c("Actor D"), actor2 = c("Actor C"), assoc_actor_1 = c("Actor A; Actor B"), assoc_actor_2 = c("Hello"),inter1 = "1", inter2="2",sub_event_type = "Protests", source_scale= "khapow",source= 'khapow') + output <- acled_transform_longer(data, "full_actors") + expect_s3_class(output, c("tbl_df", "data.frame")) +}) + +# Test functionalities ---- +# Test that the function returns the correct number of rows for full_actors type +test_that("returns correct number of rows for full_actors type", { + data <- data.frame(actor1 = c("Actor D"), actor2 = c("Actor C"), assoc_actor_1 = c("Actor A; Actor B"), assoc_actor_2 = c("Hello"), inter1 = "1", inter2="2",sub_event_type = "Protests", source_scale= "khapow",source= 'khapow') + output <- acled_transform_longer(data, "full_actors") + expect_equal(nrow(output), 5) +}) + +# Test that the function returns the correct number of rows for main_actors type +test_that("returns correct number of rows for main_actors type", { + data <- data.frame(actor1 = c("Actor D"), actor2 = c("Actor C"), assoc_actor_1 = c("Actor A; Actor B"), assoc_actor_2 = c(""),inter1 = "1", inter2="2", sub_event_type = "Protests", source_scale= "khapow",source= 'khapow') + output <- acled_transform_longer(data, "main_actors") + expect_equal(nrow(output), 2) +}) + +# Test that the function returns the correct number of rows for assoc_actors type +test_that("returns correct number of rows for assoc_actors type", { + data <- data.frame(actor1 = c("Actor D"), actor2 = c("Actor C"), assoc_actor_1 = c("Actor A; Actor B"), assoc_actor_2 = c("hello"), inter1 = "1", inter2="2",sub_event_type = "Protests", source_scale= "khapow",source= 'khapow') + output <- acled_transform_longer(data, "assoc_actors") + expect_equal(nrow(output), 3) +}) + +# Test that the function returns the correct number of rows for source type +test_that("returns correct number of rows for source type", { + data <- data.frame(actor1 = c("Actor D"), actor2 = c("Actor C"), assoc_actor_1 = c("Actor A; Actor B"), assoc_actor_2 = c("Hello"), inter1 = "1", inter2="2",sub_event_type = "Protests", source_scale= "khapow", source = c("Source A; Source B")) + output <- acled_transform_longer(data, "source") + expect_equal(nrow(output), 2) +}) + + +# Test errors---- +# Test that the function throws an error for an invalid type parameter +test_that("throws an error for invalid type parameter", { + data <- data.frame(actor1 = c("Actor A; Actor B"), actor2 = c("Actor C"), assoc_actor_1 = c("Actor D"), assoc_actor_2 = c(""),inter1 = "1", inter2="2") + expect_error(acled_transform_longer(data, "invalid_type")) +}) + +# Test that if columns are missing, it will throw an error for an invalid data frame + +test_that("if columns are missing, it will throw an error for an invalid data frame", { + data <- data.frame(actor1 = c("Actor A; Actor B"), actor2 = c("Actor C"),inter1 = "1", inter2="2", assoc_actor_1 = c("Actor D"), assoc_actor_2 = c("")) + expect_error(acled_transform_longer(data, "full_actors"), "Some columns are missing. Please make sure your data frame includes: actor1, actor2, assoc_actor_1, assoc_actor_2, sub_event_type, source_scale, source.") +}) + +# Test that you get an error if the actor1 or actor 2 columns have more than one value per row. +test_that("error if the actor1 or actor 2 columns have more than one value per row", { + data <- data.frame(actor1 = c("Actor A; Actor B"), actor2 = c("Actor C"), inter1 = "1", inter2="2",assoc_actor_1 = c("Actor D"), assoc_actor_2 = c(""), sub_event_type = "Protests", source_scale= "", source = c("Source A; Source B")) + expect_error(acled_transform_longer(data, "main_actors", "*column seems to include more than one result per row. That is inconsistent with our column structure.")) +}) + +test_that("warning when there are empty rows in the assoc actors column", { + data <- data.frame(actor1 = c("Actor D"), actor2 = c("Actor C"), inter1 = "1", inter2="2",assoc_actor_1 = c("Actor A; Actor B"), assoc_actor_2 = c(""), sub_event_type = "Protests", source_scale= "",source= '') + expect_warning(acled_transform_longer(data, "assoc_actors"), "There are empty rows in the assoc_actor column.") + expect_warning(acled_transform_longer(data, "full_actors"), "There are empty rows in the actor column.") +}) diff --git a/tests/testthat/test-acled_transform_wider.R b/tests/testthat/test-acled_transform_wider.R new file mode 100644 index 0000000..dce3b2a --- /dev/null +++ b/tests/testthat/test-acled_transform_wider.R @@ -0,0 +1,109 @@ +# Tests for proper functioning of the function (Can they return the equivalent of what is acled_transform_longer input)---- + +test_that("acled_transform_wider returns expected results for type = 'full_actors'", { + + # Define a simple acledR::acled_old_dummy frame + + # Transform acledR::acled_old_dummy from wide to long format + transformed_data <- acled_transform_longer(acledR::acled_old_dummy, type = "full_actors") + + # Transform acledR::acled_old_dummy from long to wide format + reversed_data <- acled_transform_wider(transformed_data, type = "full_actors") %>% + mutate(actor2 = na_if(actor2, "")) + + # Test if the original acledR::acled_old_dummy and the reversed acledR::acled_old_dummy are the same + expect_equal(dplyr::arrange(acledR::acled_old_dummy,event_id_cnty), dplyr::arrange(reversed_data, event_id_cnty)) +}) + +test_that("acled_transform_wider returns expected results for type = 'main_actors'", { + + # Define a simple acledR::acled_old_dummy frame + + # Transform acledR::acled_old_dummy from wide to long format + transformed_data <- acled_transform_longer(acledR::acled_old_dummy, type = "main_actors") + + # Transform acledR::acled_old_dummy from long to wide format + reversed_data <- acled_transform_wider(transformed_data, type = "main_actors") + + # Test if the original acledR::acled_old_dummy and the reversed acledR::acled_old_dummy are the same + expect_equal(dplyr::arrange(acledR::acled_old_dummy,event_id_cnty), dplyr::arrange(reversed_data, event_id_cnty)) +}) + +test_that("acled_transform_wider returns expected results for type = 'assoc_actors'", { + + # Transform acledR::acled_old_dummy from wide to long format + transformed_data <- acled_transform_longer(acledR::acled_old_dummy, type = "assoc_actors") + + # Transform acledR::acled_old_dummy from long to wide format + reversed_data <- acled_transform_wider(transformed_data, type = "assoc_actors") + + # Test if the original acledR::acled_old_dummy and the reversed acledR::acled_old_dummy are the same + expect_equal(dplyr::arrange(acledR::acled_old_dummy,event_id_cnty), dplyr::arrange(reversed_data, event_id_cnty)) +}) + +test_that("acled_transform_wider returns expected results for type = 'source'", { + + # Transform acledR::acled_old_dummy from wide to long format + transformed_data <- acled_transform_longer(acledR::acled_old_dummy, type = "source") + + # Transform acledR::acled_old_dummy from long to wide format + reversed_data <- acled_transform_wider(transformed_data, type = "source") + + # Test if the original acledR::acled_old_dummy and the reversed acledR::acled_old_dummy are the same + expect_equal(dplyr::arrange(acledR::acled_old_dummy,event_id_cnty), dplyr::arrange(reversed_data, event_id_cnty)) +}) + +# Tests for proper errors and messages---- + +## Test if function returns an error when a data frame with missing necessary columns is input---- +test_that("Function returns error with missing columns", { + + df <- data.frame(a = c(1, 2, 3), b = c("a", "b", "c")) + + expect_error(acled_transform_wider(df, "full_actors"), + "Some columns are missing. Please make sure your data frame includes: actor,type_of_actor,inter_type, and inter.") + + expect_error(acled_transform_wider(df, "main_actors"), + "Some columns are missing. Please make sure your data frame includes: actor,type_of_actor,inter_type, and inter.") + + expect_error(acled_transform_wider(df, "assoc_actors"), + "Some columns are missing. Please make sure your data frame includes: assoc_actor,type_of_assoc_actor.") + + expect_error(acled_transform_wider(df, "source"), + "Some columns are missing. Please make sure your data frame includes: source") +}) + +## Test if function returns an error when a non-existent type is input---- +test_that("Function returns NULL when non-existent type is input", { + + df <- data.frame(actor = c("a", "b"), type_of_actor = c(1, 2), inter_type = c(1, 2), inter = c(1, 2)) + + expect_error(acled_transform_wider(df, type = "non_existent_type"), regexp = "is not a valid option.") +}) + + + +# Weird cases. ---- + +## Test that if you request monadic data from the API, you can convert it back. ---- + +test_that("Can you request data from the monadic api, and convert it back?", { + + tester <- acledR::acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + country = "Brazil", start_date="2022-01-01", + end_date = "2022-12-31", monadic = T, + prompt = F, acled_access = F, log = F)%>% + acled_transform_wider(type = "api_monadic") + + control <- acledR::acled_api(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + country = "Brazil", start_date="2022-01-01", + end_date = "2022-12-31", monadic = F, + prompt = F, acled_access = F, log = F)%>% + arrange(desc(event_id_cnty)) + + + expect_equal(tester, control) + +}) + +## Test that if you use the function with some additional transformations made to the dataset, you can transform back without losing any data. ---- diff --git a/tests/testthat/test-acled_update.R b/tests/testthat/test-acled_update.R new file mode 100644 index 0000000..55428e2 --- /dev/null +++ b/tests/testthat/test-acled_update.R @@ -0,0 +1,120 @@ + + +#Proper functioning of the function ---- + +## There are no duplicates in the returned dataset ---- + +test_that("There are no duplicates in the returned data",{ + expect_equal(anyDuplicated(dupes_checks$event_id_cnty), 0L) +}) + +## Additional additional_countries are properly added to the dataset + +test_that("Additional additional_countries are properly included",{ + expect_equal( + append(unique(dupes_checks$country), c("Mexico", "Brazil")), unique(dupes_checks_plus_bramex$country)) +}) + +## When disabling deleted events, users get a different result ---- + +test_that("acled_update without deletions actually returns events that should be deleted",{ + + snap_test <- acled_update(acledR::acled_old_deletion_dummy, + email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), deleted = FALSE, + acled_access = F, prompts = F) + + expect_true(any(find_deleted_events$event_id_cnty %in% snap_test$event_id_cnty)) +}) + +# Errors and messages ---- +## An warning appears when requesting an update of data for dates earlier/later to the min of my dataset ---- + +test_that("Warning for earlier dates requested", { + expect_warning(acled_update(acledR::acled_old_dummy, + additional_countries = "Argentina", + start_date = (min(acledR::acled_old_dummy$event_date) + 10), + email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + acled_access = F, prompts = F), + regexp = "Start date is later") +}) + +test_that("Warning for start dates that are later than the earliest in the dataset", { + expect_warning(acled_update(acledR::acled_old_dummy, + additional_countries = "Argentina", + start_date = (min(acledR::acled_old_dummy$event_date) - 10), + email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + acled_access = F, prompts = F), + regexp = "Start date is earlier than") +}) + +## A warning appears when requesting an update of data for dates older/earlier to the max of my dataset ---- + +test_that("Warning for later end dates than requested", { + expect_warning(acled_update(acledR::acled_old_dummy, + additional_countries = "Argentina", + end_date = (max(acledR::acled_old_dummy$event_date) + 10), + email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + acled_access = F, prompts = F), + regexp = "End date is later than") +}) + +test_that("Warning for ealier end dates than the max requested", { + expect_warning(acled_update(acledR::acled_old_dummy, + additional_countries = "Argentina", + end_date = (max(acledR::acled_old_dummy$event_date) - 10), + email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + acled_access = F, prompts = F), + regexp = "End date is earlier than") +}) + + + +## Errors when additional_countries provided are not in acled_countries---- + +test_that("Error if `additional_countries` or `regions` are not in the dataset", { + + expect_error(acled_update(acledR::acled_old_dummy, additional_countries = "Unknown Country"), + "Error: The following additional_countries are not present in acledR::acled_countries: Unknown Country") + expect_error(acled_update(acledR::acled_old_dummy, regions = "Unknown Region"), + "Error: The following regions are not present in acledR::acled_regions: Unknown Region") + +}) + + +## Errors if acled_access is used incorrectly ---- + +test_that("An error appears if acled_access is false but no keys are provided",{ + expect_error(acled_update(acledR::acled_old_dummy, + additional_countries = "Argentina", + start_date = min(acled_old_dummy$event_date), + acled_access = F, prompts = F), regexp = "Error: If acled_access is FALSE") +}) + +## Errors when the dataset does not have the acled structure ---- + +test_that("If you have a dataset that does not match acled's structure you get an error",{ + df <- data.frame( + x="a", y="b", c="c" + ) + expect_error(acled_update(df, + additional_countries = "Argentina", + start_date = "2022-01-01", + acled_access = F, prompts = F), regexp = "The data frame provided does not have ACLED's structure") +}) + +## Errors when requesting event types that are not part of ACLED's event types ---- + +test_that("Users get an error when requesting a non existent event type",{ + expect_error(acled_update(acledR::acled_old_dummy, + email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY"), + event_types = "Snowball fights", + acled_access = F, prompts = F), regexp = "Error: Invalid event_type provided") + }) +# Weird use cases ---- + +## Does not generate duplicates when fed a non-unique list of values. - But it takes a long time ---- + +test_that("No duplicates when the function is given a non-uniques list of values in the additional_countries argument.", { + expect_equal(anyDuplicated(test_more_than_one), 0L) +}) + diff --git a/tests/testthat/testthat-problems.rds b/tests/testthat/testthat-problems.rds new file mode 100644 index 0000000..ab8236a Binary files /dev/null and b/tests/testthat/testthat-problems.rds differ diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 0000000..097b241 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/articles/acled_api.Rmd b/vignettes/articles/acled_api.Rmd new file mode 100644 index 0000000..7dd1f4b --- /dev/null +++ b/vignettes/articles/acled_api.Rmd @@ -0,0 +1,219 @@ +--- +title: "Accessing and Utilizing ACLED's API" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Accessing and Utilizing ACLED's API} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + warning = F, + message = F, + cache = F +) +``` + +## Accessing the API + +To use ACLED's [API](https://apidocs.acleddata.com/), you must first register an account in [ACLED's Access Portal](developer.acleddata.com). You can find more information about registering your account by visiting ACLED’s [access guide](https://acleddata.com/download/35300/). To store and utilize your registered credentials (your email and a unique key), you can either: + +- (recommended) Use `acled_access()` to verify your credentials and store them in your local environment for a given session. + +- Manually store your credentials for permanent use across all sessions. + +You can store your credentials using `acled_access()` by doing the following: + +```{r, eval = F} +acled_access("your_email", "your_key") # to be run every session before doing an API call. +``` + +Alternatively, to manually store your credentials as a variable in the R environment, you can run the following: + +```{r, eval = F} +file.edit(file.path("~", ".Renviron")) +``` + +which will open your `.Renviron` file. Once open, you can set: + +```{r, eval = F} +email_address = "your_email" +acled_key = "your_key" +``` + +Afterwards, you should save the file. + +You can confirm that they have been properly stored by running `Sys.getenv("email_address")` to return the stored email address and `Sys.getenv("acled_key")` to return the stored ACLED key in the console. + +## ACLED API + +`acled_api()` is a function you can use to request and process ACLED API calls. The function takes the following arguments: + +```{r, eval = F} +acled_api(email = NULL, + key = NULL, + country = NULL, + regions = NULL, + start_date = "1997-01-01", + end_date = Sys.Date(), + timestamp = NULL, + event_types = NULL, + population = "none", + monadic = FALSE, + ..., + acled_access = TRUE, + prompt = TRUE) + +``` + +## Parameters for the API + +### Geographical filters + +You can use the `country` and `regions` parameters to specify the locations from which you would like to request data. If both values are `NULL` or are not included, the API will return data for all countries and regions. If you would like to request data for multiple countries, you can do so by using a vector of country names (e.g., `c("Argentina","Spain","Bolivia")`). Similarly, you can request data from one or more regions by using either a vector of region names or numeric codes. `acledR::acled_countries` and `acledR::acled_regions` show the full lists of countries and regions available. Please visit ACLED’s [Knowledge Base](https://acleddata.com/article-categories/region-specific-methodology/) for region-specific methodology questions. + +### Temporal filters + +You can specify the date range for which you would like to receive data by using the `start_date` and `end_date` parameters, both of which require data in the "yyyy-mm-dd" format. + +You can use the `timestamp` parameter to select data that were added or updated over a specific time period. Please keep in mind that `timestamp` indicates when the event was added or modified in ACLED’s dataset, meaning that an event that occurred far in the past (i.e., with an old *event date*) may still have a recent timestamp if it was recently updated. + +In practice, the `timestamp` parameter is typically not used for analysis but is instead used to keep your own dataset up to date as changes are made to ACLED’s data. To learn more about how to keep your datasets up to date, visit the [Keeping your datasets up to date](https://acled.github.io/acledR/articles/acled_update.html) page for an acledR approach or this [guide](https://acleddata.com/download/35179/) more relevant to Excel or other spreadsheet tools. + +### Additional filters + +You can also use the `event_types` argument to filter to specific *event_types* in ACLED data. To do so, you should enter the *event_type* of interest as a string or as a vector of strings (e.g., `event_types = "Battles"` or `event_types = c("Battles", "Protests")`). For a description of all available *event_types* in ACLED’s dataset, please refer to [ACLED’s codebook](https://acleddata.com/download/2827/). + +ACLED data defaults to a wide (or dyadic) format, where each row contains multiple actor columns, with those actors interacting during the event. However, you can request a long (or monadic) format using the `monadic` argument. By default, this argument is `FALSE`, meaning you will receive a dyadic version of the data. When `monadic=TRUE`, the function will return a monadic ("long-form") data frame with only one actor (based on *actor1* and *actor2*). For transforming your dataset from wide to long without utilizing the API, or transforming it based on different sets of columns, visit `acled_transform_longer()`. For more information on the difference between our wide/dyadic and monadic/long datasets, please visit our [API guide](https://apidocs.acleddata.com/acled_endpoint.html#dyadic-versus-monadic-formats---export_type) + +Finally, you can use the `population` argument to specify if you want to include the estimated affected population columns. This argument takes three options, `none` which returns no extra columns, `best` which only returns the population_best column, or `full` which returns all the estimated population columns. For more information, visit our [Conflict Exposure piece](https://acleddata.com/conflict-exposure/). + +The `...` parameter represents any other arguments you might want to include in your API query, such as *ISO* or *Interaction*. If you want to use these filters or others not included in the list of parameters described above, then you can write them as `¶menter=value`. For instance, you might wish to include `&iso=4` at the end of the function. You can visit ACLED’s [API guide](https://apidocs.acleddata.com/acled_endpoint.html) to learn more about other valid parameters. + + +### Function options + +You can exclude the email and key parameters within the function when `acled_access()` has been used beforehand or the credentials are stored as environment variables. Alternatively, you can set `acled_access` to `FALSE` and then manually include your email and key as arguments in the function. + +You can use the `prompt` argument to specify how the function handles API calls that return large amounts of data. If `prompt=TRUE`, then you will receive an interactive prompt (see `Handling big API calls` immediately below). If you do not want this interactive prompt (e.g., because `acled_api()` is part of a routine script), then you can set `prompt = FALSE`. + +## Handling big API calls + +As is common when executing API calls, handling large volumes of data requires some special consideration. In ACLED’s case, the base API uses pagination to address some of these issues, but pagination can be confusing for newer users (see our [API guide](https://apidocs.acleddata.com/generalities_section.html#file-size-limits-pagination) for a more detailed explanation). Fortunately, this package avoids this issue. Instead of manual pagination, the `acled_api()` function splits the call automatically. + +`acled_api()` will first estimate how much data you are requesting. You will then be prompted with a message which includes the following: + +- The number of countries for which data is being requested, + +- The number of estimated events requested (based only on country and year, and NOT event type), + +- The number of API calls needed, based on an estimate of how big the call is, + +- A question asking whether given this information and the number of available API calls linked to your account – you would like to proceed with your API call. + +## Example - Gathering data with `acled_api()` + +Imagine you are interested in events from “Brazil” occurring between January 1st, 2022, and December 1st, 2022. + +```{r, eval = FALSE} +library(acledR) +library(dplyr) + +#Note: This is simply an example–you will need to include your own credentials rather than the email and key placeholders that are included below. + +acled_access(email = "your_email", key = "your_key") + +df_br <- acled_api(country = c("Brazil"), + start_date = "2022-01-01", + end_date = "2022-12-01", + monadic = F, + acled_access = TRUE, + prompt = F) +``` +```{r,echo=FALSE } +library(acledR) +library(dplyr) + +#Note: This is simply an example–you will need to include your own credentials rather than the email and key placeholders that are included below. + +acled_access(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY")) + +df_br <- acled_api(country = c("Brazil"), + start_date = "2022-01-01", + end_date = "2022-12-01", + monadic = F, + acled_access = TRUE, + prompt = F) +``` + + + +This returns a tibble that includes each ACLED event in “Brazil” during the specified period: + +```{r} +head(df_br, 5) +``` + +If you wanted data from both “Brazil” and “Colombia”, you would execute the following: + +```{r, eval = F} +df_br_co <- acled_api(country = c("Brazil", "Colombia"), + start_date = "2022-01-01", + end_date = "2022-12-01", + monadic = F, + acled_access = TRUE, + prompt = F) +``` + +If you are interested in events occurring over a larger area, it may be simpler to omit the `country` parameter and include a `regions` argument instead. You could also include an `event_type` argument to receive only a specific type of event: + +```{r, eval = F} +df_sa <- acled_api(regions = c("South America"), + start_date = "2022-01-01", + end_date = "2022-12-01", + event_type = "Protests", + monadic = F, + acled_access = TRUE, + prompt = F) +``` + +You can use the *timestamp* column/filter to specify the dates from which you would like to receive new or updated data. You can include the argument as either a string ("yyyy-mm-dd") or a numeric Unix timestamp: + +```{r, eval = F} +df_br_co <- acled_api(country = c("Brazil", "Colombia"), + start_date = "2022-01-01", + end_date = "2022-12-01", + monadic = F, + # timestamp = "2022-01-24" -> in the case of string + timestamp = 1643056974, # -> in the case of a numeric Unix timestamp + acled_access = TRUE, + prompt = F) +``` + +If you would like to include only one type of *interaction* (e.g., "Rioters versus Civilians (57)"), then you can add *interaction* code to the `...` argument: + +```{r, eval = F} +df_sa <- acled_api(country = c("Brazil", "Colombia"), + start_date = "2022-01-01", + end_date = "2022-12-01", + monadic = F, + ... = "&interaction=57", + acled_access = TRUE, + prompt = F) +``` + +You could also request the monadic version of the data by setting `monadic = TRUE`: + +```{r} +df_sa_monadic <- acled_api(regions = c("South America"), + start_date = "2022-01-01", + end_date = "2022-12-01", + monadic = T, + acled_access = TRUE, + prompt = F) +``` + +Best of luck! diff --git a/vignettes/articles/acled_transformations.Rmd b/vignettes/articles/acled_transformations.Rmd new file mode 100644 index 0000000..aa7e41f --- /dev/null +++ b/vignettes/articles/acled_transformations.Rmd @@ -0,0 +1,188 @@ +--- +title: "Transformation of ACLED data" +output: html_document +date: "2022-11-11" +vignette: > + %\VignetteIndexEntry{Transformation of ACLED data} + %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} +--- +```{r loading packs, echo = F, message=FALSE, warning=FALSE} +library(kableExtra) +library(knitr) +library(acledR) +library(dplyr) +``` + +The ACLED dataset is designed with user readability in mind. At times, this focus might conflict with standard clean data principles, such as having only a single value per column and row. To circumvent these issues, and to make it easier to use ACLED data in certain programmatic settings, you can use the data manipulation suite of functions in the `acledR` package. Currently, ACLED has three available functions: `acled_transform_interactions`, `acled_transform_longer` and `acled_transform_wider`. + +## 1. Switch between numeric and string interaction codes - `acled_transform_interactions()` + +The first function in this suite, `acled_transform_interactions()`, allows you to easily transition from numeric interaction codes to a text description of the interaction code. + +In our analyses, we often refer to actor types by using text categories (e.g. *State Forces* or *Rebel Groups*), while our dataset structures these categories using a numeric categorization. You can find more information - including a table of which actor categories correspond to which numeric codes – in ACLED’s [codebook](https://acleddata.com/download/2827/). + +This function allows you to convert your numeric codes into text descriptions, without the time-consuming need of writing out these changes yourself. + +```{fun , eval = FALSE} +acled_transform_interactions(df, + only_inters = F) + +``` + +The function requires two arguments: + +* `data`: An ACLED dataset which includes the inter1 and inter2 variables (when `only_inter = F`). + +* `only_inters`: Boolean option on whether to include only *inter1* and *inter2*, without including *interaction*. This option defaults to `FALSE`, thus including the *interaction* column. + +The function simply returns a modified dataframe with the swapped inter & interaction formats. In the *interaction* column, you will find the actor types separated by “-”, for example: + +```{r} +acledR::acled_old_dummy[39:40,] %>% + # Displaying only relevant columns + select(event_id_cnty, inter1, inter2, interaction) + +``` + +... will change to ... + +```{r} +acledR::acled_old_dummy[39:40,] %>% + acled_transform_interaction()%>% + select(event_id_cnty, inter1, inter2, interaction)%>% + head(2) +``` + + +## 2. From wide to long formats - `acled_transform_longer()` + + +`acled_transform_longer()` allows you to switch between wide and long formats without the need to make a new API call. Typical ACLED data is in a wide format, with multiple actors represented in each row (see our [API interactive guide](https://acled.github.io/ACLED-api-guide/acled_endpoint.html#dyadic-versus-monadic-formats---export_type) for a more detailed explanation). This format generally works well if you are interested in conducting event-based analyses. Still, there are times when you may wish to conduct actor-based analyses that are better suited to a long data format where each actor has a separate row, and a single event may therefore be represented in multiple rows. + +Note that wide and long formats are generic terms that are more specifically referred to as dyadic and monadic data types in other ACLED documentation (see [ACLED endpoint guide](https://acled.github.io/ACLED-api-guide/acled_endpoint.html)). + +```{r setup, eval = FALSE} + +acled_transform_longer(data, + type = "full_actors") + +``` + +`acled_transform_longer()` requires two arguments: + +* `data`: A wide format ACLED dataset. + +* `type`: A character vector indicating which columns to transpose (i.e. the columns that go from wide to long format). + +The available column options upon which ACLED data can be transposed are: + +1. `full_actor`: Transposes all the actor columns in the dataset (*actor1*, *actor2*, *assoc_actor_1*, *assoc_actor_2*). There will be a separate row for each actor or associate actor involved in each event. This generates four new columns: `type_of_actor` and `actor`, and `inter_type` and `inter`. `type_of_actor` denotes the original column in which the actor was found (i.e. *actor1*, *actor2*, *assoc_actor_1*, *assoc_actor_2*), with the ‘actor’ column simply being the actor's name. Similarly, `inter` is the actor’s inter code, with`inter_type` denoting whether the code came from the *inter1* or *inter2* column. + +2. `main_actors`: Transposes only *actor1* and *actor2*. There will be separate rows for main actors only. This generates two new columns: `type_of_actor` and `actor`. `type_of_actor` denotes the column in which the actor was originally found, while `actor` is simply the name of the actor. + +3. `assoc_actors`: Transposes only *assoc_actor_1* and *assoc_actor_2* columns. There will be separate rows for associate actors only. This generates two new columns: `type_of_actor` and `actor`. `type_of_actor` denotes whether the actor was originally found in the *assoc_actor_1* or *assoc_actor_2* column, while `actor` is simply the name of the associate actor. **Note:** The data will still include *actor1* and *actor2* columns. + +4. `source`: Transposes only the *source* column. There will be a separate row for each source in the *source* column. + +Keep in mind that you can receive some data in monadic/longer form directly from ACLED’s API, but using this function instead can provide some added benefits. Specifically: + +- You can use this function to transform a dyadic/wide dataset to a monadic/long dataset, thus receiving the latter without executing an additional API call. + +- You have more control over the columns used when transforming your dataset from wide to long format. This function allows you to transpose on the following columns: *actor1* & *actor2*, *assoc_actor_1*, *assoc_actor_2*, and *source*. The API only allows you to receive long-format data based on *actor1*, *actor2*, *assoc_actor_1*, *assoc_actor_2*, without an option to control if you want some of these columns or all of them. + + + +## From long to wide format - `acled_transform_wider()` + +`acledR` also offers the inverse of `acled_transform_longer()`, allowing you to pivot your dataframe back to a wider format (dyadic form). The function is meant to aid users that may have used `acled_transform_longer()` and would like to return the dataframe to its original state. + +The function is similar to its counterpart: +```{r, eval = F} + +acled_transform_wider(data, + type = "full_actors") + +``` + +As you can see, the arguments are the same as those for `acled_transform_longer()`: + +* `data`: A wide format ACLED dataset + +* `type`: A character vector indicating which columns to transpose (the columns that go from long to wide format). + + + +## Example + +In this section you can walk through a potential use case for the transformation functions. + +For this example, assume that you are interested in data from “South America” during the first half of 2023. **NOTE**: The email and key values below are only examples. You should provide your own credentials that you can create by using [ACLED’s website](https://apidocs.acleddata.com/get_started.html#getting-your-api-key). + +```{r, eval = FALSE} + +library(acledR) + +acled_access(email = "your_email", key = "your_key") + +df_sa <- acled_api(regions = "South America", + start_date = "2023-01-01", + end_date = "2023-06-01", + monadic = F, + acled_access = TRUE, + prompt = F) +``` +```{r, echo=FALSE} + +library(acledR) + +acled_access(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY")) + +df_sa <- acled_api(regions = "South America", + start_date = "2023-01-01", + end_date = "2023-06-01", + monadic = F, + acled_access = TRUE, + prompt = F) +``` +Now that your data are in long format with one actor per row, you can much more easily filter the data to retain only those events involving the "Military Forces of Colombia (2022-)": + +```{r} + +mil_colombia <- df_sa %>% + filter(stringr::str_detect(paste(actor1,actor2,assoc_actor_1, assoc_actor_2, sep = ";"), "Military Forces of Colombia (2022-)")) + +``` + +In the filtered events there are `r nrow(mil_colombia)` rows, meaning there were `r nrow(mil_colombia)` events where the "Military Forces of Colombia (2022-)" were involved as an actor or associate actor. + +Instead of filtering to the events involving a particular actor, you may wish to calculate the number of events in which each actor in the dataset participates. The issue is that an actor may be represented in any of the four actor columns, so you cannot simply sum the number of rows in which an actor appears in one particular column. A simple solution is to transform the dataset into long form and then calculate event counts for each actor. You can begin by using the `acled_transform_longer()` function: + +```{r} + +df_sa_long <- acled_transform_longer(df_sa, type = "full_actors") + +``` + +The dataset is now in long form with each row representing a single actor in a single event. You can now count the number of rows for each actor, but only after grouping by **unique** *event_id_cnty*. It is very important to count rows by unique identifiers because when transforming data to long format, events can be represented in multiple rows equal to the number of actors involved in that event. + +```{r} +library(tidyr) +library(dplyr) + +actors_df_sa <- df_sa_long %>% + group_by(actor) %>% + summarise(n_events = n_distinct(unique(event_id_cnty))) + +``` + +To verify your results, you can filter actor counts to only "Military Forces of Colombia (2022-)". + +```{r} + +actors_df_sa %>% + filter(actor == "Military Forces of Colombia (2022-)") %>% + .$n_events +``` + +The number of events matches the number of rows you got when first filtering by actor. diff --git a/vignettes/articles/acled_update.Rmd b/vignettes/articles/acled_update.Rmd new file mode 100644 index 0000000..2ce8673 --- /dev/null +++ b/vignettes/articles/acled_update.Rmd @@ -0,0 +1,129 @@ +--- +title: "Keeping your dataset up to date" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Keeping your dataset up to date} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} + +--- +```{r setup, echo=FALSE} +knitr::opts_chunk$set(echo = TRUE) + +defaultW <- getOption("warn") + +options(warn = -1) + +``` + +ACLED’s is a ‘living dataset’, meaning the dataset is added to frequently and existing data can be updated. + +The dataset changes in three ways: + +- New events + - ACLED adds new events weekly. Each event is published with a new and unique *event_id_cnty*. + +- Updates of previously published events + - In some cases, ACLED modifies published events as new information comes to light. For example, an actor may claim responsibility for an attack long after the event takes place, or the number of fatalities may increase or decrease as victims succumb to their injuries or are found alive. When an event is modified, the *event_id_cnty* remains the same but the information is updated with the old information being overwritten, including the *timestamp* field. + +- Deletion of events + - In some cases, ACLED deletes published events. Deletions happen when new information surfaces indicating that the event no longer fits the scope of ACLED's dataset, or when new information suggests that two separate events are actually duplicate reports of a single event. When deletions occur, the event is removed from the dataset along with its unique *event_id_cnty* value. + +For users with an ACLED dataset saved locally on their computer, ACLED suggests that you regularly check for deleted or updated events to ensure your dataset is up to date. In this section you will learn how to keep your dataset updated by using the `acledR` package. + +## Keeping track of updates - `acled_update()` + +As detailed in ACLED’s guide about [updating your dataset](https://acleddata.com/download/35179/), in some cases events are updated or deleted, necessitating an update of your downloaded dataset. + +Unlike for [deleted events](https://acled.github.io/ACLED-api-guide/deleted_endpoint.html), there is no separate API endpoint to check for updated events. When events are updated, their timestamp changes to reflect the timing of the most recent change. This means, that you can find the updated events by using `acledR::acled_api()` while providing the most recent timestamp (i.e. `max({your ACLED dataset}$timestamp))`) in your local dataset as the `timestamp` argument of the function. If there is an event with a more recent *timestamp* but the same *event_id_cnty* as an event in your downloaded dataset, then that event has been modified. Hence, you can remove the duplicated event with the smaller timestamp value. + +To simplify this process, `acledR` includes a function which makes the update for you by following the steps previously explained: + +```{r, eval=F} +acled_update( + df, + start_date = min(df$event_date), + end_date = max(df$event_date), + additional_countries = "current countries", + regions = NULL, + event_types = NULL, + acled_access = TRUE, + email = NULL, + key = NULL, + deleted = TRUE, + prompts = TRUE) +``` + +The function has the following arguments: + +- `df`: The dataframe to update. It has to have the same structure as ACLED's dyadic dataframe (i.e. the default result of `acled_api()`) + +- `start_date`: The first date of events you want to update from. These are the celling and floor of *event_date*, not of *timestamp*. For example, `start_date = “2023-06-01”` will update every event where the *event_date* is above or equal to 2023-06-01. + +- `end_date`: The last date of events you want to update from. These are the celling and floor of *event_date*, not of *timestamp*. For example, `end_date = “2023-06-06”` will update every event where the *event_date* is below or equal to 2023-06-06. Both `start_date` and `end_date` default to the corresponding max and min *event_date* in your dataset. + +- `additional_countries`: Additional countries to add to your dataset. It defaults to “current countries”, which includes all the countries inside your dataset. + +- `regions`: The regions for which you would like events in your dataset updated. + +- `event_types`: The event types for which you would like events in your dataset updated. + +- `acled_access`: If you have already used `acled_access()`, you can set this option as TRUE (default) to avoid having to input your email and access key. + +- `email`: The email you have registered in [ACLED's Access Portal](https://developer.acleddata.com/). This argument is not required if `acled_access = TRUE`. + +- `key`: The key you have registered in [ACLED's Access Portal](https://developer.acleddata.com/). This argument is not required if `acled_access = TRUE`. + +- `deleted`: If TRUE, in addition to updating the information in updated events, this function will also remove deleted events from your dataset by using [ACLED API's deleted endpoint](https://acled.github.io/acledR/articles/acled_deletions_api.html). + +- `prompts`: If TRUE prompts from your call will be suppressed. See `acled_api()`. + +## Examples + +In this section you can learn to use `acled_update` to keep your datasets updated. + +Load your downloaded dataset: + +```{r, eval = T, message=FALSE} +library(acledR) +library(lubridate) +library(dplyr) +``` + +```{r, eval=FALSE} +acled_access(email = "your_email", key = "your_key") # This is an example, you will need to input your credentials. + +argen_dummy_acled_file <- acledR::acled_old_dummy # Here is our old personal ACLED dataset +``` +```{r, echo=FALSE} +acled_access(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY")) # This is an example, you will need to input your credentials. + +argen_dummy_acled_file <- acledR::acled_old_dummy # Here is our old personal ACLED dataset +``` + +When was the last time you downloaded or updated your dataset? +```{r} +latest_timestamp_unix <- max(argen_dummy_acled_file$timestamp) + +latest_timestamp <- as_datetime(latest_timestamp_unix) +``` + +The dataset has not been updated since `r as_date(latest_timestamp)`, so you may want a more updated version. To do so, you can use `acled_update()`. If you are only interested in updating events that are already in your dataset, you can ignore the `start_date` and `end_date` arguments. If you also wish to remove deleted events from your dataset you can set `deleted=TRUE`. + +```{r} + +new_argen_dataset <- acled_update(argen_dummy_acled_file, + additional_countries = "Argentina", + acled_access = T, + prompts = FALSE) +``` + +Now your dataset captures modified and newly created events. + +Best of luck! + +```{r, echo=F} +options(warn = defaultW) +``` + + diff --git a/vignettes/get_started.Rmd b/vignettes/get_started.Rmd new file mode 100644 index 0000000..30a27ea --- /dev/null +++ b/vignettes/get_started.Rmd @@ -0,0 +1,136 @@ +--- +title: "Get Started" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Get Started} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +knitr::include_graphics("workflow.png") +``` + +``` {r, echo=FALSE,include = FALSE} +library(acledR) +library(dplyr) +``` + +Welcome! + +In this vignette you will receive a brief explanation of how to use ACLED’s R package. Besides this initial walkthrough, you can find more detailed explanations and examples under the 'Utilizing acledR' tab. Alternatively, for information relating to ACLED’s methodology, please visit [ACLED's Knowledge Base](https://acleddata.com/knowledge-base/). + +The main objectives of this package are (i) to facilitate access to ACLED data in R by providing a wrapper to submit GET requests to ACLED’s API, and (ii) to simplify the manipulation of ACLED data. + +The general workflow for which this package was designed, and therefore the layout of this vignette, is as follows: + +![Authenticate your API credentials. Request data from ACLED API. Transform data received from the API. Generate initial analysis from the data.](workflow.png) + +You can begin by installing the package: + +```{r, eval=FALSE} + +# Installing the package +install.packages("acledR") # To install the package from CRAN + +devtools::install_github("ACLED/acledR") # To install the developer's branch from Github. + +# Loading the package +library(acledR) +``` + +## Authenticating your credentials - `acled_update()` + +To be able to access the API, you require an API key. If you are not registered in ACLED's Access portal, you can find it [here](developer.acleddata.com). If you would like a detailed guide on how to create your account and get your API key, please see [ACLED's Access Guide](https://acleddata.com/download/35300/). + +Once you are registered, we encourage you to authenticate your credentials by using the `acled_access()` function. This function allows you to test that your credentials are working as intended and save your credentials in your R environment, thus allowing you to avoid the need to manually input your credentials during each data request. + +```{r, eval=FALSE} +acled_access(email = "your_email", key = "your_key") # This is an example, you will need to input your credentials. +``` + +```{r,echo=FALSE} +acled_access(email = Sys.getenv("EMAIL_ADDRESS_EXAMPLES"), key = Sys.getenv("EXAMPLES_KEY")) +``` + + + +If the authentication was successful, you will find a message in the console stating "Authorization accepted". + +## Requesting data from ACLED’s API - `acled_api()` + +Once your credentials are authenticated, you can start requesting data from the API using the `acled_api()` function. The function accepts several fields used to filter data and specify data formatting. For example, you could request data from Argentina in 2022 using the following code chunk to receive a dataframe: + +```{r} +argentinian_data <- acled_api(country = "Argentina", start_date="2022-01-01",end_date = "2022-12-31", prompt=F) +``` + +Note that while this example uses only three arguments (`country`, `start_date` and `end_date`), `acled_api()` accepts several other useful arguments, descriptions of which can be found in `vignette("acled_api")`. Further note that if you are not using the `acled_access` function to store your credentials, you would also need to specify your email and key when making API requests. + + +Finally, `acled_api()` allows you to request large amounts of data from the API, potentially requiring the subsetting of your requests into multiple smaller requests. You can find more about how `acled_api()` subsets data by visiting `vignette(“acled_api”)`. + +```{r, echo=F} +glimpse(argentinian_data) +``` + + +## Updating ACLED Data - `acled_update()` + +ACLED data are regularly updated. You can ensure that your own dataset remains current by using the `acled_update()` function. `acled_update()` is designed to handle the intricacies of updating your ACLED dataset, accounting for new events, modifications to existing events, and deletions. + +To update your dataset you need only to provide the `acled_update` function with a dataframe of your old dataset. Note that there are other options providing more control over how your dataset is updated, more information for which can be found in the `vignette("acled_update")`. + +Here is an example of how you can use `acled_update()` to update an old dataset: + +```{r} +new_data <- acled_update(acledR::acled_old_deletion_dummy, prompts = F) +``` + +## Transforming ACLED Data - `acled_transform_*` + +ACLED data has a unique structure which can make data manipulation non-trivial. The `acledR` package provides a suite of functions to simplify data manipulation. You can find a more in-depth treatment of ACLED’s data transformation functions by visiting the `vignette("acled_transformations")`. + +### 1. Converting Interaction Codes - `acled_transform_interaction()` + +The `acled_transform_interaction()` function allows you to convert between numeric and text interaction codes, facilitating easier interpretation and analysis of ACLED data. The function requires your ACLED dataset and an optional boolean argument `only_inters`, which determines whether to include only `inter1` and `inter2` columns or also the `interaction` column in the output. By default, `only_inters` is set to `FALSE`. + +```{r} +transformed_data <- acled_transform_interaction(argentinian_data) + +# Note the inter1 and inter2 columns +head(transformed_data) +``` + +### 2. Reshaping Data: Wide to Long Format - `acled_transform_longer()` + +The `acled_transform_longer()` function transforms ACLED data from a wide format, where multiple actors are represented in each row, to a long format, with separate rows for each actor. This is particularly useful for actor-based analyses. + +```{r} +long_data <- acled_transform_longer(argentinian_data, type = "full_actors") + +head(long_data) +``` + +You can specify the type of transformation using the type argument, choosing from `full_actors`, `main_actors`, `assoc_actors`, or `source`. For instance, specifying `full_actors` will result in you transforming the data frame such that every actor and associate actor for a given event is represented in a separate row. This function provides flexibility and control over the transformation process, allowing you to tailor the data structure to your specific needs. + +### 3. Reshaping Data: Long to Wide Format - `acled_transform_wider()` + +Conversely, the `acled_transform_wider()` function enables you to pivot your data back to a wide format. This function may be useful if you used `acled_transform_longer()` and wish to revert to the original data structure. + +```{r} +wide_data <- acled_transform_wider(long_data, type = "full_actors") + +head(wide_data) +``` + +Like its counterpart, this function requires the data and type arguments. + +---- + +Now your dataset should be ready for you to analyze! Remember to always consult [ACLED's Methodology](https://acleddata.com/resources/) when analyzing ACLED data–it should provide a deeper understanding of ACLED data and your analyses. diff --git a/vignettes/workflow.png b/vignettes/workflow.png new file mode 100644 index 0000000..dafb7e5 Binary files /dev/null and b/vignettes/workflow.png differ