diff --git a/.checkov.yml b/.checkov.yml new file mode 100644 index 0000000..2c67e16 --- /dev/null +++ b/.checkov.yml @@ -0,0 +1,11 @@ +directory: + - modules/ + - examples/ +download-external-modules: false # This should ideally be true but there's a lot of findings in the upstream open source modules. +framework: terraform +compact: true +quiet: false +summary-position: bottom + +skip-check: + - CKV_TF_1 # Ensure Terraform module sources use a commit hash // pending https://github.com/hashicorp/terraform/issues/29867 diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..316e7ae --- /dev/null +++ b/.editorconfig @@ -0,0 +1,18 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +indent_size = 2 +indent_style = space +insert_final_newline = true +trim_trailing_whitespace = true +max_line_length = 120 +tab_width = 4 + +[{Makefile,go.mod,go.sum,*.go,.gitmodules}] +indent_style = tab +indent_size = 4 + +[*.md] +trim_trailing_whitespace = false diff --git a/.env b/.env new file mode 100644 index 0000000..95eec7e --- /dev/null +++ b/.env @@ -0,0 +1,3 @@ +BUILD_HARNESS_REPO=ghcr.io/defenseunicorns/build-harness/build-harness +# renovate: datasource=github-tags depName=defenseunicorns/build-harness +BUILD_HARNESS_VERSION=1.14.8 diff --git a/.github/ISSUE_TEMPLATE/general_issue.md b/.github/ISSUE_TEMPLATE/general_issue.md new file mode 100644 index 0000000..d089988 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/general_issue.md @@ -0,0 +1,48 @@ +--- +name: General Issue +about: Suggest a new feature, report a bug, or just ask a question +title: '' +labels: '' +assignees: '' + +--- + + + +### Persona + + + + + +### Description + + + + + +### Use Case + + + + + +### Impact + + + + + +### Completion + + + + + +### Additional Context + + diff --git a/.github/workflows/pr-merge-group.yml b/.github/workflows/pr-merge-group.yml new file mode 100644 index 0000000..e6d8b0e --- /dev/null +++ b/.github/workflows/pr-merge-group.yml @@ -0,0 +1,25 @@ +# triggers on merge_group and pull_request events +# only use this if merge queue is enabled, otherwise stick to test-command for e2e testing + +name: pr-merge-group +on: + merge_group: + types: [checks_requested] + pull_request: + +defaults: + run: + shell: bash -eo pipefail {0} + +permissions: + id-token: write # needed for oidc auth for AWS creds + contents: read + +jobs: + pr-merge-group-test: + uses: defenseunicorns/delivery-github-actions-workflows/.github/workflows/pr-merge-group-test.yml@main + secrets: + APPLICATION_ID: ${{ secrets.NARWHAL_BOT_APP_ID }} + APPLICATION_PRIVATE_KEY: ${{ secrets.NARWHAL_BOT_SECRET }} + AWS_COMMERCIAL_ROLE_TO_ASSUME: ${{ secrets.NARWHAL_AWS_COMMERCIAL_ROLE_TO_ASSUME }} + AWS_GOVCLOUD_ROLE_TO_ASSUME: ${{ secrets.NARWHAL_AWS_GOVCLOUD_ROLE_TO_ASSUME }} diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..419689a --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,20 @@ +# If the workflow trigger is "pull_request", run pre-commit checks. +name: pre-commit + +on: + pull_request: + merge_group: + workflow_dispatch: + + +permissions: + pull-requests: write + id-token: write + contents: read + +jobs: + pre-commit: + uses: defenseunicorns/delivery-github-actions-workflows/.github/workflows/pre-commit.yml@main + secrets: + APPLICATION_ID: ${{ secrets.NARWHAL_BOT_APP_ID }} + APPLICATION_PRIVATE_KEY: ${{ secrets.NARWHAL_BOT_SECRET }} diff --git a/.github/workflows/pull-request-opened-by-renovate.yml b/.github/workflows/pull-request-opened-by-renovate.yml new file mode 100644 index 0000000..24f35bb --- /dev/null +++ b/.github/workflows/pull-request-opened-by-renovate.yml @@ -0,0 +1,40 @@ +# If Renovate is not the author of the PR that triggers this workflow, it will do nothing. +# If Renovate is the author of the PR that triggers this workflow, but the workflow event is anything but "opened", it will do nothing. +# If Renovate is the author of the PR that triggers this workflow, and the workflow event is "opened", it will: +# 1. Autoformat using pre-commit and, if necessary, push an additional commit to the PR with the autoformat fixes. +# 2. Change the branch protection rules to turn off require codeowner approval due to github apps not being able to be codeowners or added to teams. +# 3. narwhal-bot approves the PR. +# 4. narwhal-bot merges the PR. +# 5. PR is added to merge queue. +# 6. tests are ran. +# a. If tests pass, PR is merged. +# i. If PR is merged, it is closed and branch is deleted. +# b. If tests fail, PR stays open and it is removed from merge queue. +# 7. Branch protection is always set back to the original state. +# +# See ADR #0008. +name: auto-test +on: + pull_request: + # WARNING: DO NOT ADD MORE EVENT TYPES HERE! Because this workflow will push a new commit to the PR in the Autoformat step, adding more event types may cause an infinite loop. + types: + - opened + +permissions: + id-token: write + contents: write + +defaults: + run: + # We need -e -o pipefail for consistency with GitHub Actions' default behavior + shell: bash -e -o pipefail {0} + +jobs: + renovate-test: + if: github.event.client_payload.github.actor == 'renovate[bot]' || github.actor == 'renovate[bot]' + uses: defenseunicorns/delivery-github-actions-workflows/.github/workflows/renovate-test.yml@main + secrets: + APPLICATION_ID: ${{ secrets.NARWHAL_BOT_APP_ID }} + APPLICATION_PRIVATE_KEY: ${{ secrets.NARWHAL_BOT_SECRET }} + AWS_COMMERCIAL_ROLE_TO_ASSUME: ${{ secrets.NARWHAL_AWS_COMMERCIAL_ROLE_TO_ASSUME }} + AWS_GOVCLOUD_ROLE_TO_ASSUME: ${{ secrets.NARWHAL_AWS_GOVCLOUD_ROLE_TO_ASSUME }} diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml new file mode 100644 index 0000000..ec4959d --- /dev/null +++ b/.github/workflows/release-please.yml @@ -0,0 +1,19 @@ +# On every push to main, run release-please to automatically handle the release process. + +name: release-please + +on: + push: + branches: + - main + +permissions: + contents: write + pull-requests: write + +jobs: + release-please: + uses: defenseunicorns/delivery-github-actions-workflows/.github/workflows/release-please.yml@main + secrets: + APPLICATION_ID: ${{ secrets.NARWHAL_BOT_APP_ID }} + APPLICATION_PRIVATE_KEY: ${{ secrets.NARWHAL_BOT_SECRET }} diff --git a/.github/workflows/repo-config.yml b/.github/workflows/repo-config.yml new file mode 100644 index 0000000..5f5712d --- /dev/null +++ b/.github/workflows/repo-config.yml @@ -0,0 +1,19 @@ +name: repo-config + +on: + schedule: + # daily at 11:00 UTC + - cron: '0 11 * * *' + +jobs: + repo-config: + uses: defenseunicorns/delivery-github-actions-workflows/.github/workflows/repo-config.yml@main + secrets: + APPLICATION_ID: ${{ secrets.NARWHAL_BOT_APP_ID }} + APPLICATION_PRIVATE_KEY: ${{ secrets.NARWHAL_BOT_SECRET }} + with: + branch: main + checks: |- + checks: + - context: 'e2e-tests' + - context: 'pre-commit-checks' diff --git a/.github/workflows/scheduled-e2e-secure-test.yml b/.github/workflows/scheduled-e2e-secure-test.yml new file mode 100644 index 0000000..98386bc --- /dev/null +++ b/.github/workflows/scheduled-e2e-secure-test.yml @@ -0,0 +1,23 @@ +name: scheduled-e2e-secure-test + +on: + schedule: + # weekly on Mondays at 12:00 UTC + - cron: '0 12 * * 1' + +defaults: + run: + shell: bash -eo pipefail {0} + +permissions: + id-token: write # needed for oidc auth for AWS creds + contents: read + +jobs: + scheduled-e2e-secure-test: + uses: defenseunicorns/delivery-github-actions-workflows/.github/workflows/secure-test-with-chatops.yml@main + secrets: + APPLICATION_ID: ${{ secrets.NARWHAL_BOT_APP_ID }} + APPLICATION_PRIVATE_KEY: ${{ secrets.NARWHAL_BOT_SECRET }} + AWS_GOVCLOUD_ROLE_TO_ASSUME: ${{ secrets.NARWHAL_AWS_GOVCLOUD_ROLE_TO_ASSUME }} + SLACK_WEBHOOK_URL: ${{ secrets.NARWHAL_SLACK_URL }} diff --git a/.github/workflows/slash-command-dispatch.yml b/.github/workflows/slash-command-dispatch.yml new file mode 100644 index 0000000..b6eecf8 --- /dev/null +++ b/.github/workflows/slash-command-dispatch.yml @@ -0,0 +1,50 @@ +# When someone with write access to the repo adds a comment to a PR that contains "/test ", dispatch the workflow found in "test-command.yml" +# When someone with write access to the repo adds a comment to a PR that contains "/update ", dispatch the workflow found in "update-command.yml" + +name: Slash Command Dispatch + +on: + issue_comment: + types: [created] + +jobs: + + slashCommandDispatchTest: + if: github.event.issue.pull_request && contains(github.event.comment.body, '/test') + runs-on: ubuntu-latest + steps: + - name: Get token + id: get_workflow_token + uses: peter-murray/workflow-application-token-action@v2 + with: + application_id: ${{ secrets.NARWHAL_BOT_APP_ID }} + application_private_key: ${{ secrets.NARWHAL_BOT_SECRET }} + + - name: Slash Command Dispatch + uses: peter-evans/slash-command-dispatch@v3 + with: + token: ${{ steps.get_workflow_token.outputs.token }} + reaction-token: ${{ steps.get_workflow_token.outputs.token }} + commands: test + permission: write + issue-type: pull-request + + slashCommandDispatchUpdate: + if: github.event.issue.pull_request && contains(github.event.comment.body, '/update') + runs-on: ubuntu-latest + steps: + - name: Get token + id: get_workflow_token + uses: peter-murray/workflow-application-token-action@v2 + with: + application_id: ${{ secrets.NARWHAL_BOT_APP_ID }} + application_private_key: ${{ secrets.NARWHAL_BOT_SECRET }} + + - name: Slash Command Dispatch + uses: peter-evans/slash-command-dispatch@v3 + with: + token: ${{ steps.get_workflow_token.outputs.token }} + reaction-token: ${{ steps.get_workflow_token.outputs.token }} + commands: update + permission: write + issue-type: pull-request diff --git a/.github/workflows/test-command.yml b/.github/workflows/test-command.yml new file mode 100644 index 0000000..5a84d8f --- /dev/null +++ b/.github/workflows/test-command.yml @@ -0,0 +1,38 @@ +# usage: + # A user with write status to the repo can from a PR comment: + + # run a single test + # /test make= region= + + # run ping test + # /test ping + + # run all tests in the makefile + # /test + +name: test +on: + repository_dispatch: + types: [test-command] + + +permissions: + id-token: write + contents: read + +defaults: + run: + # We need -e -o pipefail for consistency with GitHub Actions' default behavior + shell: bash -e -o pipefail {0} + +jobs: + e2e-test: + uses: defenseunicorns/delivery-github-actions-workflows/.github/workflows/e2e-test.yml@main + secrets: + APPLICATION_ID: ${{ secrets.NARWHAL_BOT_APP_ID }} + APPLICATION_PRIVATE_KEY: ${{ secrets.NARWHAL_BOT_SECRET }} + AWS_COMMERCIAL_ROLE_TO_ASSUME: ${{ secrets.NARWHAL_AWS_COMMERCIAL_ROLE_TO_ASSUME }} + AWS_GOVCLOUD_ROLE_TO_ASSUME: ${{ secrets.NARWHAL_AWS_GOVCLOUD_ROLE_TO_ASSUME }} + with: + # check if the required slash command args are present, if so populate the json matrix, else pass in null and relevant e2e tests that would require a make target and region will be skipped + e2e-test-matrix: ${{ (contains(github.event.client_payload.slash_command_args.named, 'make') && contains(github.event.client_payload.slash_command_args.named, 'region')) && format('[{{"make-target":"{0}", "region":"{1}"}}]', github.event.client_payload.slash_command_args.named.make, github.event.client_payload.slash_command_args.named.region) || null }} diff --git a/.github/workflows/update-command.yml b/.github/workflows/update-command.yml new file mode 100644 index 0000000..da8b1a8 --- /dev/null +++ b/.github/workflows/update-command.yml @@ -0,0 +1,22 @@ +# This workflow is triggered by a comment on a pull request. The comment must contain "/update " to trigger the workflow. + +name: update +on: + repository_dispatch: + types: [update-command] + +permissions: + id-token: write + contents: write + +defaults: + run: + # We need -e -o pipefail for consistency with GitHub Actions' default behavior + shell: bash -e -o pipefail {0} + +jobs: + update: + uses: defenseunicorns/delivery-github-actions-workflows/.github/workflows/update.yml@main + secrets: + APPLICATION_ID: ${{ secrets.NARWHAL_BOT_APP_ID }} + APPLICATION_PRIVATE_KEY: ${{ secrets.NARWHAL_BOT_SECRET }} diff --git a/.gitignore b/.gitignore index 9b8a46e..296cf43 100644 --- a/.gitignore +++ b/.gitignore @@ -1,21 +1,35 @@ -# Local .terraform directories -**/.terraform/* +.cache/ +.idea/ +.DS_Store +.vscode + +# Local .terraform directories +.terraform/ +*.terraform.* # .tfstate files *.tfstate *.tfstate.* +*.terraform.lock.hcl + # Crash log files crash.log crash.*.log # Exclude all .tfvars files, which are likely to contain sensitive data, such as -# password, private keys, and other secrets. These should not be part of version -# control as they are data points which are potentially sensitive and subject +# password, private keys, and other secrets. These should not be part of version +# control as they are data points which are potentially sensitive and subject # to change depending on the environment. *.tfvars *.tfvars.json +# Except ones that we do want to commit because they are used for automated tests +!examples/complete/fixtures.common.tfvars +!examples/complete/fixtures.insecure.tfvars +!examples/complete/fixtures.secure.tfvars +!modules/cloudtrail/examples/complete/fixtures.create-bucket.tfvars + # Ignore override files as they are usually used to override resources locally and so # are not checked in override.tf @@ -32,3 +46,18 @@ override.tf.json # Ignore CLI configuration files .terraformrc terraform.rc + +# Ignore Terraform cache +.terragrunt-cache* + +# Ignore Terraform state files +backend.tf + +# Ignore Checkov external module downloads +.external_modules + +examples/zarf-complete-example/build + +#ignore lamba builds json file created from deploying lambda resource + +**/ignore diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..2a912a6 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,42 @@ +run: + timeout: 5m +linters: + enable-all: true + disable: + - depguard + - exhaustivestruct + - exhaustruct + - gci + - goerr113 + - gofumpt + - goimports + - gomnd + - lll + - nlreturn + - stylecheck + # - testpackage + - varnamelen + # - wrapcheck + - wsl +linters-settings: + funlen: + lines: 120 + testifylint: + enable-all: false + enable: + - bool-compare + - compares + - empty + - error-is-as + - error-nil + - expected-actual + - float-compare + - len + - suite-dont-use-pkg + - suite-extra-assert-call + - suite-thelper + # -require-error causes errors in our e2e test patterns +issues: + exclude: + - "G304" # Potential file inclusion via variable + exclude-use-default: false diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..ffd9cda --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,52 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-added-large-files + args: ["--maxkb=1024"] + - id: check-merge-conflict + - id: detect-aws-credentials + args: + - "--allow-missing-credentials" + - id: detect-private-key + - id: end-of-file-fixer + - id: fix-byte-order-marker + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + - id: check-yaml + args: + - "--allow-multiple-documents" + - repo: https://github.com/sirosen/texthooks + rev: 0.6.3 + hooks: + - id: fix-smartquotes + - repo: https://github.com/tekwizely/pre-commit-golang + rev: v1.0.0-rc.1 + hooks: + - id: go-fmt + - id: golangci-lint + args: + - "--timeout=10m" + - "--verbose" + - "--allow-parallel-runners" + - repo: https://github.com/antonbabenko/pre-commit-terraform + rev: v1.84.0 + hooks: + - id: terraform_fmt + - id: terraform_docs + args: + - --args=--lockfile=false + - --hook-config=--path-to-file=README.md # Valid UNIX path. I.e. ../TFDOC.md or docs/README.md etc. + - --hook-config=--add-to-existing-file=true # Boolean. true or false + - --hook-config=--create-file-if-not-exist=true # Boolean. true or false + - id: terraform_checkov + verbose: true + args: + - --args=--config-file __GIT_WORKING_DIR__/.checkov.yml + - id: terraform_tflint + args: + - --args=--config=__GIT_WORKING_DIR__/.tflint.hcl + - repo: https://github.com/renovatebot/pre-commit-hooks + rev: 37.91.4 + hooks: + - id: renovate-config-validator diff --git a/.release-please-manifest.json b/.release-please-manifest.json new file mode 100644 index 0000000..e18ee07 --- /dev/null +++ b/.release-please-manifest.json @@ -0,0 +1,3 @@ +{ + ".": "0.0.0" +} diff --git a/.tflint.hcl b/.tflint.hcl new file mode 100644 index 0000000..7b5e054 --- /dev/null +++ b/.tflint.hcl @@ -0,0 +1,10 @@ +plugin "terraform" { + enabled = true + preset = "recommended" +} + +#plugin "aws" { +# enabled = true +# version = "0.23.0" +# source = "github.com/terraform-linters/tflint-ruleset-aws" +#} diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 0000000..23c476f --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,5 @@ +* @defenseunicorns/delivery-aws-iac + +# Privileged Files +/CODEOWNERS @defenseunicorns/delivery-aws-iac-admin +/LICENSE @defenseunicorns/delivery-aws-iac-admin diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..d4ba739 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,62 @@ +# Contributor Guide + +Thanks so much for wanting to help out! :tada: + +Most of what you'll see in this document is our attempt at documenting the lightweight development process that works for our team. We're always open to feedback and suggestions for improvement. The intention is not to force people to follow this process step by step, rather to document it as a norm and provide a baseline for discussion. + +## Developer Experience + +Continuous Delivery is core to our development philosophy. Check out [https://minimumcd.org](https://minimumcd.org/) for a good baseline agreement on what that means. + +Specifically: + +- We do trunk-based development (`main`) with short-lived feature branches that originate from the trunk, get merged to the trunk, and are deleted after the merge. +- We don't merge work into `main` that isn't releasable. +- We perform automated testing on all pushes to `main`. Fixing failing pipelines in `main` are prioritized over all other work. +- We create immutable release artifacts. + +### Developer Workflow + +:key: == Required by automation + +1. Pick an issue to work on, assign it to yourself, and drop a comment in the issue to let everyone know you're working on it. +2. Create a Draft Pull Request targeting the `main` branch as soon as you are able to, even if it is just 5 minutes after you started working on it. We lean towards working in the open as much as we can. If you're not sure what to put in the PR description, just put a link to the issue you're working on. If you're not sure what to put in the PR title, just put "WIP" (Work In Progress) and we'll help you out with the rest. +3. :key: The automated tests have to pass for the PR to be able to be merged. To run the tests in the PR add a comment to the PR that says `/test`. **NOTE** tests still have to pass in the merge queue, **you do not need to have tests pass in the PR, status checks are automatically reported as success in the PR**. If you want to run a specific test manually in the PR, you can use `/test make= region=`. The available CI tests are found in the [Makefile](./Makefile) and start with the string "test-ci-" +4. If your PR is still set as a Draft transition it to "Ready for Review" +5. Get it reviewed by a [CODEOWNER](./CODEOWNERS) +6. Add the PR to the merge queue +7. The merge queue will run different tests based on if it's a `release-please` pull request or just a regular pull request. If it's a `release-please` pull request, it will run all make targets starting with `test-ci-` and `test-release-` by default. If it's a regular pull request, it will run all make targets starting with `test-ci-` test by default. If the tests fail, the PR will be removed from the merge queue and the PR stays open. If the tests pass, the PR will be merged to `main` and the PR will be closed. +8. If the issue is fully resolved, close it. _Hint: You can add "Closes #XXX" to the PR description to automatically close the issue when the PR is merged._ + +### Pre-Commit Hooks + +This project uses [pre-commit](https://pre-commit.com/) to run a set of checks on your code before you commit it. You have the option to either install pre-commit and all other needed tools locally or use our docker-based build harness. To use the build harness, run + +```shell +make run-pre-commit-hooks +``` +> NOTE: Sometimes file ownership of stuff in the `.cache` folder can get messed up. You can optionally add the `fix-cache-permissions` target to the above command to fix that. It is idempotent so it is safe to run it every time. + +### Commit Messages + +Because we use the [release-please](https://github.com/googleapis/release-please) bot, commit messages to main must follow the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) specification. This is enforced by the [commitlint](https://commitlint.js.org/#/) tool. This requirement is only enforced on the `main` branch. Commit messages in PRs can be whatever you want them to be. "Squash" mode must be used when merging a PR, with a commit message that follows the Conventional Commits specification. + +### Release Process + +This repo uses the [release-please](https://github.com/googleapis/release-please) bot. Release-please will automatically open a PR to update the version of the repo when a commit is merged to `main` that follows the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) specification. The bot will automatically keep the PR up to date until a human merges it. When that happens the bot will automatically create a new release. + +### Backlog Management + +- We use [GitHub Issues](https://github.com/defenseunicorns/delivery-aws-iac/issues) to manage our backlog. +- Issues need to meet our Definition of Ready (see below). If it does not meet the Definition of Ready, we may close it and ask the requester to re-open it once it does. + +#### Definition of Ready for a Backlog Item + +To meet the Definition of Ready the issue needs to answer the following questions: +- Who is requesting it? +- What is being requested? +- Why is it needed? +- What is the impact? What will happen if the request is not fulfilled? +- How do we know that we are done? + +This can take various forms, and we don't care which form the issue takes as long as it answers the questions above. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..64524a7 --- /dev/null +++ b/Makefile @@ -0,0 +1,138 @@ +include .env + +.DEFAULT_GOAL := help + +# Optionally add the "-it" flag for docker run commands if the env var "CI" is not set (meaning we are on a local machine and not in github actions) +TTY_ARG := +ifndef CI + TTY_ARG := -it +endif + +# Silent mode by default. Run `make VERBOSE=1` to turn off silent mode. +ifndef VERBOSE +.SILENT: +endif + +# Idiomatic way to force a target to always run, by having it depend on this dummy target +FORCE: + +.PHONY: help +help: ## Show a list of all targets + grep -E '^[a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) \ + | sed -n 's/^\(.*\): \(.*\)##\(.*\)/\1:\3/p' \ + | column -t -s ":" + +.PHONY: _create-folders +_create-folders: + mkdir -p .cache/docker + mkdir -p .cache/pre-commit + mkdir -p .cache/go + mkdir -p .cache/go-build + mkdir -p .cache/tmp + mkdir -p .cache/.terraform.d/plugin-cache + mkdir -p .cache/.zarf-cache + +.PHONY: _test-all +_test-all: _create-folders + # import any TF_VAR_ environment variables into the docker container. + echo "Running automated tests. This will take several minutes. At times it does not log anything to the console. If you interrupt the test run you will need to log into AWS console and manually delete any orphaned infrastructure.";\ + TF_VARS=$$(env | grep '^TF_VAR_' | awk -F= '{printf "-e %s ", $$1}'); \ + docker run $(TTY_ARG) --rm \ + --cap-add=NET_ADMIN \ + --cap-add=NET_RAW \ + -v "${PWD}:/app" \ + -v "${PWD}/.cache/tmp:/tmp" \ + -v "${PWD}/.cache/go:/root/go" \ + -v "${PWD}/.cache/go-build:/root/.cache/go-build" \ + -v "${PWD}/.cache/.terraform.d/plugin-cache:/root/.terraform.d/plugin-cache" \ + -v "${PWD}/.cache/.zarf-cache:/root/.zarf-cache" \ + --workdir "/app" \ + -e TF_LOG_PATH \ + -e TF_LOG \ + -e GOPATH=/root/go \ + -e GOCACHE=/root/.cache/go-build \ + -e TF_PLUGIN_CACHE_MAY_BREAK_DEPENDENCY_LOCK_FILE=true \ + -e TF_PLUGIN_CACHE_DIR=/root/.terraform.d/plugin-cache \ + -e AWS_REGION \ + -e AWS_DEFAULT_REGION \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -e AWS_SESSION_TOKEN \ + -e AWS_SECURITY_TOKEN \ + -e AWS_SESSION_EXPIRATION \ + -e SKIP_SETUP \ + -e SKIP_TEST \ + -e SKIP_TEARDOWN \ + $${TF_VARS} \ + ${BUILD_HARNESS_REPO}:${BUILD_HARNESS_VERSION} \ + bash -c 'git config --global --add safe.directory /app && cd examples/complete && terraform init -upgrade=true && cd ../../test/e2e && go test -count 1 -v $(EXTRA_TEST_ARGS) .' + +.PHONY: test +test: ## Run all automated tests. Requires access to an AWS account. Costs real money. + $(MAKE) _test-all EXTRA_TEST_ARGS="-timeout 3h" + +.PHONY: test-ci-complete +test-ci-complete: ## Run one test (TestExamplesCompleteCommon). Requires access to an AWS account. Costs real money. + $(eval export TF_VAR_region := $(or $(REGION),$(TF_VAR_region),us-east-2)) + $(MAKE) _test-all EXTRA_TEST_ARGS="-timeout 3h -run TestExamplesCompleteCommon" + +.PHONY: test-complete-plan-only +test-complete-plan-only: ## Run one test (TestExamplesCompletePlanOnly). Requires access to an AWS account. It will not cost money or create any resources since it is just running `terraform plan`. + $(eval export TF_VAR_region := $(or $(REGION),$(TF_VAR_region),us-east-2)) + $(MAKE) _test-all EXTRA_TEST_ARGS="-timeout 2h -run TestExamplesCompletePlanOnly" + +.PHONY: docker-save-build-harness +docker-save-build-harness: _create-folders ## Pulls the build harness docker image and saves it to a tarball + docker pull ${BUILD_HARNESS_REPO}:${BUILD_HARNESS_VERSION} + docker save -o .cache/docker/build-harness.tar ${BUILD_HARNESS_REPO}:${BUILD_HARNESS_VERSION} + +.PHONY: docker-load-build-harness +docker-load-build-harness: ## Loads the saved build harness docker image + docker load -i .cache/docker/build-harness.tar + +.PHONY: _runhooks +_runhooks: _create-folders + docker run $(TTY_ARG) --rm \ + -v "${PWD}:/app" \ + -v "${PWD}/.cache/tmp:/tmp" \ + -v "${PWD}/.cache/go:/root/go" \ + -v "${PWD}/.cache/go-build:/root/.cache/go-build" \ + -v "${PWD}/.cache/.terraform.d/plugin-cache:/root/.terraform.d/plugin-cache" \ + -v "${PWD}/.cache/.zarf-cache:/root/.zarf-cache" \ + --workdir "/app" \ + -e GOPATH=/root/go \ + -e GOCACHE=/root/.cache/go-build \ + -e TF_PLUGIN_CACHE_MAY_BREAK_DEPENDENCY_LOCK_FILE=true \ + -e TF_PLUGIN_CACHE_DIR=/root/.terraform.d/plugin-cache \ + -e "SKIP=$(SKIP)" \ + -e "PRE_COMMIT_HOME=/app/.cache/pre-commit" \ + ${BUILD_HARNESS_REPO}:${BUILD_HARNESS_VERSION} \ + bash -c 'git config --global --add safe.directory /app && pre-commit run -a --show-diff-on-failure $(HOOK)' + +.PHONY: pre-commit-all +pre-commit-all: ## Run all pre-commit hooks. Returns nonzero exit code if any hooks fail. Uses Docker for maximum compatibility + $(MAKE) _runhooks HOOK="" SKIP="" + +.PHONY: pre-commit-terraform +pre-commit-terraform: ## Run the terraform pre-commit hooks. Returns nonzero exit code if any hooks fail. Uses Docker for maximum compatibility + $(MAKE) _runhooks HOOK="" SKIP="check-added-large-files,check-merge-conflict,detect-aws-credentials,detect-private-key,end-of-file-fixer,fix-byte-order-marker,trailing-whitespace,check-yaml,fix-smartquotes,go-fmt,golangci-lint,renovate-config-validator" + +.PHONY: pre-commit-golang +pre-commit-golang: ## Run the golang pre-commit hooks. Returns nonzero exit code if any hooks fail. Uses Docker for maximum compatibility + $(MAKE) _runhooks HOOK="" SKIP="check-added-large-files,check-merge-conflict,detect-aws-credentials,detect-private-key,end-of-file-fixer,fix-byte-order-marker,trailing-whitespace,check-yaml,fix-smartquotes,terraform_fmt,terraform_docs,terraform_checkov,terraform_tflint,renovate-config-validator" + +.PHONY: pre-commit-renovate +pre-commit-renovate: ## Run the renovate pre-commit hooks. Returns nonzero exit code if any hooks fail. Uses Docker for maximum compatibility + $(MAKE) _runhooks HOOK="renovate-config-validator" SKIP="" + +.PHONY: pre-commit-common +pre-commit-common: ## Run the common pre-commit hooks. Returns nonzero exit code if any hooks fail. Uses Docker for maximum compatibility + $(MAKE) _runhooks HOOK="" SKIP="go-fmt,golangci-lint,terraform_fmt,terraform_docs,terraform_checkov,terraform_tflint,renovate-config-validator" + +.PHONY: fix-cache-permissions +fix-cache-permissions: ## Fixes the permissions on the pre-commit cache + docker run $(TTY_ARG) --rm -v "${PWD}:/app" --workdir "/app" -e "PRE_COMMIT_HOME=/app/.cache/pre-commit" ${BUILD_HARNESS_REPO}:${BUILD_HARNESS_VERSION} chmod -R a+rx .cache + +.PHONY: autoformat +autoformat: ## Update files with automatic formatting tools. Uses Docker for maximum compatibility. + $(MAKE) _runhooks HOOK="" SKIP="check-added-large-files,check-merge-conflict,detect-aws-credentials,detect-private-key,check-yaml,golangci-lint,terraform_checkov,terraform_tflint,renovate-config-validator" diff --git a/README.md b/README.md index 8bdf8df..6c57bc0 100644 --- a/README.md +++ b/README.md @@ -1 +1 @@ -# aws-narwhal-iac-swf-reference-deployment \ No newline at end of file +# aws-narwhal-iac-swf-reference-deployment diff --git a/main.tf b/main.tf new file mode 100644 index 0000000..e69de29 diff --git a/release-please-config.json b/release-please-config.json new file mode 100644 index 0000000..c8e0d13 --- /dev/null +++ b/release-please-config.json @@ -0,0 +1,36 @@ +{ + "packages": { + ".": { + "bump-minor-pre-major": true, + "bump-patch-for-minor-pre-major": true, + "changelog-host": "https://github.com", + "changelog-path": "CHANGELOG.md", + "changelog-sections": [ + { "type": "feat", "section": "Features" }, + { "type": "feature", "section": "Features" }, + { "type": "fix", "section": "Bug Fixes" }, + { "type": "perf", "section": "Performance Improvements" }, + { "type": "revert", "section": "Reverts" }, + { "type": "docs", "section": "Documentation" }, + { "type": "style", "section": "Styles" }, + { "type": "chore", "section": "Miscellaneous Chores" }, + { "type": "refactor", "section": "Code Refactoring" }, + { "type": "test", "section": "Tests" }, + { "type": "build", "section": "Build System" }, + { "type": "ci", "section": "Continuous Integration" } + ], + "changelog-type": "default", + "draft": false, + "draft-pull-request": false, + "include-component-in-tag": false, + "include-v-in-tag": true, + "prerelease": false, + "pull-request-header": ":robot: I have created a release *beep* *boop*", + "pull-request-title-pattern": "chore${scope}: release${component} ${version}", + "release-type": "simple", + "separate-pull-requests": false, + "skip-github-release": false, + "versioning": "default" + } + } +} diff --git a/renovate.json5 b/renovate.json5 new file mode 100644 index 0000000..722b8b7 --- /dev/null +++ b/renovate.json5 @@ -0,0 +1,69 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "extends": [ + // Tells Renovate to maintain one GitHub issue as the "dependency dashboard". See https://docs.renovatebot.com/key-concepts/dashboard + ":dependencyDashboard", + // Use semantic commit type fix for dependencies and chore for all others if semantic commits are in use. See https://docs.renovatebot.com/presets-default/#semanticprefixfixdepschoreothers + ":semanticPrefixFixDepsChoreOthers", + // Group all updates together. See https://docs.renovatebot.com/presets-group/#groupall + // Other less drastic groupings that may be of interest include: group:allNonMajor, group:recommended, group:monorepos + "group:all", + // Apply crowd-sourced package replacement rules. See https://docs.renovatebot.com/presets-replacements/#replacementsall + "replacements:all", + // Apply crowd-sourced workarounds for known problems with packages. See https://docs.renovatebot.com/presets-workarounds/#workaroundsall + "workarounds:all" + ], + // If we don't specify a timezone then Renovate will use UTC + "timezone": "America/New_York", + // fires between 4 am and 5 am EST on mondays + "schedule": [ + "after 4am and before 8am on Monday" + ], + // This will prevent Renovate from automatically rebasing PRs. Without this, Renovate will rebase PRs whenever it wants to. The 'schedule' param is only for creating PRs. Because we are grouping all changes into one PR without this Renovate will be constantly rebasing that PR which we don't want since every time that happens another set of GHA status checks are kicked off. + // Using a value of "conflicted" means that Renovate will only rebase PRs if they are in a conflicted state. See https://docs.renovatebot.com/configuration-options/#rebasewhen + "rebaseWhen": "never", + // Labels to set in Pull Request. See https://docs.renovatebot.com/configuration-options/#labels + "labels": [ + "renovate" + ], + // Rate limit PRs to maximum x created per hour. 0 means no limit. See https://docs.renovatebot.com/configuration-options/#prhourlylimit + "prHourlyLimit": 1, + // Limit to a maximum of x concurrent branches/PRs. 0 means no limit. See https://docs.renovatebot.com/configuration-options/#prconcurrentlimit + "prConcurrentLimit": 0, + // Enable updates to the pre-commit-config.yaml file. See https://docs.renovatebot.com/modules/manager/pre-commit/ + "pre-commit": { + "enabled": true + }, + "regexManagers": [ + // Custom regex manager for the .env file that follows the pattern documented here: https://docs.renovatebot.com/modules/manager/regex/#advanced-capture + { + "fileMatch": ["^.env"], + "matchStrings": [ + "datasource=(?.*?) depName=(?.*?)( versioning=(?.*?))?\\s.*?_VERSION=(?.*)\\s" + ], + "versioningTemplate": "{{#if versioning}}{{{versioning}}}{{else}}semver-coerced{{/if}}", + "extractVersionTemplate": "^v?(?.*)$" + }, + // Custom regex manager for the .tool-versions file that follows the pattern documented here: https://docs.renovatebot.com/modules/manager/regex/#advanced-capture + { + "fileMatch": ["^.tool-versions$"], + "matchStrings": [ + "datasource=(?.*?) depName=(?.*?)( versioning=(?.*?))?\\s.*? (?.*)\\s" + ], + "versioningTemplate": "{{#if versioning}}{{{versioning}}}{{else}}semver-coerced{{/if}}", + "extractVersionTemplate": "^v?(?.*)$" + } + ], + "packageRules": [ + { + "matchPackageNames": ["k8s.io/client-go"], + "allowedVersions": "<1.0.0" + }, + { + "matchManagers": ["terraform"], + "matchDepTypes": ["module"], + "matchDatasources": ["github-tags", "git-tags"], + "versioning": "loose" + } + ] +} diff --git a/test/e2e/examples_complete_insecure_test.go b/test/e2e/examples_complete_insecure_test.go new file mode 100644 index 0000000..1afdf93 --- /dev/null +++ b/test/e2e/examples_complete_insecure_test.go @@ -0,0 +1,50 @@ +package e2e_test + +import ( + "testing" + "time" + + "github.com/gruntwork-io/terratest/modules/terraform" + teststructure "github.com/gruntwork-io/terratest/modules/test-structure" + + "github.com/defenseunicorns/delivery_aws_iac_utils/pkg/utils" +) + +func TestExamplesCompleteInsecure(t *testing.T) { + t.Parallel() + tempFolder := teststructure.CopyTerraformFolderToTemp(t, "../..", "examples/complete") + terraformOptions := &terraform.Options{ + TerraformDir: tempFolder, + Upgrade: false, + VarFiles: []string{ + "fixtures.common.tfvars", + "fixtures.insecure.tfvars", + }, + RetryableTerraformErrors: map[string]string{ + ".*": "Failed to apply Terraform configuration due to an error.", + }, + MaxRetries: 5, + TimeBetweenRetries: 5 * time.Second, + } + + // Defer the teardown + defer func() { + t.Helper() + teststructure.RunTestStage(t, "TEARDOWN", func() { + terraform.Destroy(t, terraformOptions) + }) + }() + + // Set up the infra + teststructure.RunTestStage(t, "SETUP", func() { + terraform.InitAndApply(t, terraformOptions) + }) + + // Run assertions + teststructure.RunTestStage(t, "TEST", func() { + utils.ValidateEFSFunctionality(t, tempFolder) + utils.DownloadZarfInitPackage(t) + utils.ConfigureKubeconfig(t, tempFolder) + utils.ValidateZarfInit(t, tempFolder) + }) +} diff --git a/test/e2e/examples_complete_plan_only_test.go b/test/e2e/examples_complete_plan_only_test.go new file mode 100644 index 0000000..03642b8 --- /dev/null +++ b/test/e2e/examples_complete_plan_only_test.go @@ -0,0 +1,30 @@ +package e2e_test + +import ( + "testing" + + "github.com/gruntwork-io/terratest/modules/terraform" + teststructure "github.com/gruntwork-io/terratest/modules/test-structure" +) + +func TestExamplesCompletePlanOnly(t *testing.T) { + t.Parallel() + tempFolder := teststructure.CopyTerraformFolderToTemp(t, "../..", "examples/complete") + terraformOptionsPlan := &terraform.Options{ + TerraformDir: tempFolder, + Upgrade: false, + VarFiles: []string{ + "fixtures.common.tfvars", + "fixtures.insecure.tfvars", + }, + // Set any overrides for variables you would like to validate + Vars: map[string]interface{}{ + "keycloak_enabled": false, + }, + SetVarsAfterVarFiles: true, + } + teststructure.RunTestStage(t, "SETUP", func() { + terraform.Init(t, terraformOptionsPlan) + terraform.Plan(t, terraformOptionsPlan) + }) +} diff --git a/test/e2e/examples_complete_secure_test.go b/test/e2e/examples_complete_secure_test.go new file mode 100644 index 0000000..57a7c72 --- /dev/null +++ b/test/e2e/examples_complete_secure_test.go @@ -0,0 +1,125 @@ +package e2e_test + +import ( + "os/exec" + "testing" + "time" + + "github.com/gruntwork-io/terratest/modules/logger" + "github.com/gruntwork-io/terratest/modules/terraform" + teststructure "github.com/gruntwork-io/terratest/modules/test-structure" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/defenseunicorns/delivery_aws_iac_utils/pkg/utils" +) + +// This test deploys the complete example in govcloud, "secure mode". Secure mode is: +// - Self-managed nodegroups only +// - Dedicated instance tenancy +// - EKS public endpoint disabled +// Sequence of events: +// 1. Deploy the VPC and Bastion. +// 2. With Sshuttle tunneling to the bastion, deploy the rest of the example. +// 3. With Sshuttle tunneling to the bastion, destroy EKS cluster. +// 4. Destroy the rest of the example. +func TestExamplesCompleteSecure(t *testing.T) { + t.Parallel() + // Setup options + tempFolder := teststructure.CopyTerraformFolderToTemp(t, "../..", "examples/complete") + terraformInitOptions := &terraform.Options{ + TerraformDir: tempFolder, + Upgrade: false, + } + terraformOptionsNoTargets := &terraform.Options{ + TerraformDir: tempFolder, + VarFiles: []string{ + "fixtures.common.tfvars", + "fixtures.secure.tfvars", + }, + RetryableTerraformErrors: map[string]string{ + ".*": "Failed to apply Terraform configuration due to an error.", + }, + MaxRetries: 5, + TimeBetweenRetries: 5 * time.Second, + } + terraformOptionsWithVPCAndBastionTargets := &terraform.Options{ + TerraformDir: tempFolder, + VarFiles: []string{ + "fixtures.common.tfvars", + "fixtures.secure.tfvars", + }, + Targets: []string{ + "module.vpc", + "module.bastion", + }, + RetryableTerraformErrors: map[string]string{ + ".*": "Failed to apply Terraform configuration due to an error.", + }, + MaxRetries: 5, + TimeBetweenRetries: 5 * time.Second, + } + terraformOptionsWithEKSTarget := &terraform.Options{ + TerraformDir: tempFolder, + VarFiles: []string{ + "fixtures.common.tfvars", + "fixtures.secure.tfvars", + }, + Targets: []string{ + "module.eks", + }, + RetryableTerraformErrors: map[string]string{ + ".*": "Failed to apply Terraform configuration due to an error.", + }, + MaxRetries: 5, + TimeBetweenRetries: 5 * time.Second, + } + + // Defer the teardown + defer func() { + t.Helper() + teststructure.RunTestStage(t, "TEARDOWN", func() { + terraformOutputOptions := &terraform.Options{ + TerraformDir: tempFolder, + Logger: logger.Discard, + } + _, outputBastionInstanceIDErr := terraform.OutputE(t, terraformOutputOptions, "bastion_instance_id") + // We are intentionally using `assert` here and not `require`. We want the rest of this function to run even if there are errors. + assert.NoError(t, outputBastionInstanceIDErr) + _, outputVpcCidrErr := terraform.OutputE(t, terraformOutputOptions, "vpc_cidr") + assert.NoError(t, outputVpcCidrErr) + _, outputBastionRegionErr := terraform.OutputE(t, terraformOutputOptions, "bastion_region") + assert.NoError(t, outputBastionRegionErr) + if outputBastionInstanceIDErr == nil && outputVpcCidrErr == nil && outputBastionRegionErr == nil { + // We can only destroy using sshuttle if the bastion exists and is functional. If we get, for example, an error saying there is not enough capacity in the chosen AZ, the bastion will have never been deployed and this will fail because `terraform output` didn't return anything. + err := utils.DestroyWithSshuttle(t, terraformOptionsWithEKSTarget) + assert.NoError(t, err) + } + terraform.Destroy(t, terraformOptionsNoTargets) + }) + }() + + // Deploy the infra + teststructure.RunTestStage(t, "SETUP", func() { + terraform.Init(t, terraformInitOptions) + terraform.Apply(t, terraformOptionsWithVPCAndBastionTargets) + err := utils.ApplyWithSshuttle(t, terraformOptionsNoTargets) + require.NoError(t, err) + }) + + // Run assertions + teststructure.RunTestStage(t, "TEST", func() { + // Start sshuttle + cmd, err := utils.RunSshuttleInBackground(t, tempFolder) + require.NoError(t, err) + defer func(t *testing.T, cmd *exec.Cmd) { + t.Helper() + err := utils.StopSshuttle(t, cmd) + require.NoError(t, err) + }(t, cmd) + utils.ValidateEFSFunctionality(t, tempFolder) + utils.DownloadZarfInitPackage(t) + utils.ConfigureKubeconfig(t, tempFolder) + utils.ValidateZarfInit(t, tempFolder) + }) +} diff --git a/test/e2e/main_test.go b/test/e2e/main_test.go new file mode 100644 index 0000000..054e068 --- /dev/null +++ b/test/e2e/main_test.go @@ -0,0 +1,29 @@ +package e2e_test + +import ( + "context" + "os" + "testing" + "time" + + "github.com/defenseunicorns/delivery_aws_iac_utils/pkg/utils" +) + +// TestMain is the entry point for all tests. We are using a custom one so that we can log a message to the console every few seconds. Without this there is a risk of GitHub Actions killing the test run if it believes it is hung. +func TestMain(m *testing.M) { + ctx, cancel := context.WithCancel(context.Background()) + go func() { + for { + select { + case <-ctx.Done(): + return + default: + utils.DoLog("The test is still running! Don't kill me!") + } + time.Sleep(180 * time.Second) + } + }() + exitVal := m.Run() + cancel() + os.Exit(exitVal) +} diff --git a/test/e2e/utils/utils.go b/test/e2e/utils/utils.go new file mode 100644 index 0000000..8d65c47 --- /dev/null +++ b/test/e2e/utils/utils.go @@ -0,0 +1,290 @@ +// Package utils is a package that contains utility functions for the e2e tests. +package utils + +import ( + "context" + "encoding/base64" + "fmt" + "os/exec" + "testing" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/eks" + "github.com/gruntwork-io/terratest/modules/logger" + "github.com/gruntwork-io/terratest/modules/terraform" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + batchv1 "k8s.io/api/batch/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "sigs.k8s.io/aws-iam-authenticator/pkg/token" +) + +// TODO: Figure out how to parse the input variables to get the bastion password rather than having to hardcode it. +// +//nolint:godox +const bastionPassword = "my-password" + +// DoLog logs the given arguments to the given writer, along with a timestamp. +func DoLog(args ...interface{}) { + date := time.Now() + prefix := fmt.Sprintf("%s:", date.Format(time.RFC3339)) + allArgs := append([]interface{}{prefix}, args...) + fmt.Println(allArgs...) //nolint:forbidigo +} + +// GetEKSCluster returns the EKS cluster for the given terraform folder. +func GetEKSCluster(t *testing.T, tempFolder string) (*eks.Cluster, error) { + t.Helper() + terraformOutputOptions := &terraform.Options{ + TerraformDir: tempFolder, + Logger: logger.Discard, + } + // Get outputs + bastionRegion := terraform.Output(t, terraformOutputOptions, "bastion_region") + clusterName := terraform.Output(t, terraformOutputOptions, "eks_cluster_name") + // Create the EKS clientset + sess := session.Must(session.NewSession(&aws.Config{ + Region: aws.String(bastionRegion), + })) + eksSvc := eks.New(sess) + input := &eks.DescribeClusterInput{Name: aws.String(clusterName)} + result, err := eksSvc.DescribeCluster(input) + if err != nil { + return nil, fmt.Errorf("failed to describe cluster: %w", err) + } + return result.Cluster, nil +} + +// NewK8sClientset returns a new kubernetes clientset for the given cluster. +func NewK8sClientset(cluster *eks.Cluster) (*kubernetes.Clientset, error) { + gen, err := token.NewGenerator(true, false) + if err != nil { + return nil, fmt.Errorf("failed to create token generator: %w", err) + } + opts := &token.GetTokenOptions{ + ClusterID: aws.StringValue(cluster.Name), + } + tok, err := gen.GetWithOptions(opts) + if err != nil { + return nil, fmt.Errorf("failed to create token: %w", err) + } + ca, err := base64.StdEncoding.DecodeString(aws.StringValue(cluster.CertificateAuthority.Data)) + if err != nil { + return nil, fmt.Errorf("failed to decode string: %w", err) + } + clientset, err := kubernetes.NewForConfig( + &rest.Config{ + Host: aws.StringValue(cluster.Endpoint), + BearerToken: tok.Token, + TLSClientConfig: rest.TLSClientConfig{ + CAData: ca, + }, + }, + ) + if err != nil { + return nil, fmt.Errorf("failed to create clientset: %w", err) + } + return clientset, nil +} + +// ApplyWithSshuttle runs terraform apply with sshuttle running in the background. +func ApplyWithSshuttle(t *testing.T, terraformOptions *terraform.Options) error { + t.Helper() + cmd, err := RunSshuttleInBackground(t, terraformOptions.TerraformDir) + if err != nil { + return err + } + defer func(t *testing.T, cmd *exec.Cmd) { + t.Helper() + err := StopSshuttle(t, cmd) + require.NoError(t, err) + }(t, cmd) + terraform.Apply(t, terraformOptions) + return nil +} + +// DestroyWithSshuttle runs terraform destroy with sshuttle running in the background. +func DestroyWithSshuttle(t *testing.T, terraformOptions *terraform.Options) error { + t.Helper() + cmd, err := RunSshuttleInBackground(t, terraformOptions.TerraformDir) + if err != nil { + return err + } + defer func(t *testing.T, cmd *exec.Cmd) { + t.Helper() + err := StopSshuttle(t, cmd) + require.NoError(t, err) + }(t, cmd) + terraform.Destroy(t, terraformOptions) + return nil +} + +// RunSshuttleInBackground runs sshuttle in the background. +func RunSshuttleInBackground(t *testing.T, tempFolder string) (*exec.Cmd, error) { + t.Helper() + terraformOutputOptions := &terraform.Options{ + TerraformDir: tempFolder, + Logger: logger.Discard, + } + bastionInstanceID := terraform.Output(t, terraformOutputOptions, "bastion_instance_id") + bastionPrivateDNS := terraform.Output(t, terraformOutputOptions, "bastion_private_dns") + vpcCidr := terraform.Output(t, terraformOutputOptions, "vpc_cidr") + bastionRegion := terraform.Output(t, terraformOutputOptions, "bastion_region") + // Check that SShuttle is actually working by querying the bastion's private DNS, which will only work if sshuttle is working. + // If it works, it will return exit code 52 ("Empty reply from server"). Failure will most likely result in exit code 28 ("Couldn't connect to server"), but any result other than exit code 52 should be treated as a failure. + // We'll retry a few times in case the bastion is still starting up. + retryAttempts := 25 + var sshuttleCmd *exec.Cmd + for i := 0; i < retryAttempts; i++ { + sshuttleCmd, err := startSshuttle(t, bastionInstanceID, bastionRegion, bastionPassword, vpcCidr) + if err != nil { + return nil, fmt.Errorf("failed to start sshuttle: %w", err) + } + + // It takes a few seconds for sshuttle to start up + time.Sleep(20 * time.Second) + + //nolint:gosec + curlCmd := exec.Command("curl", "-v", bastionPrivateDNS) + // We don't care about the output, just the exit code. Since we are looking for exit code 52, we should expect an error here. + err = curlCmd.Run() + if err != nil { + DoLog(err) + } + if curlCmd.ProcessState.ExitCode() == 52 { + // Success! sshuttle is working. + return sshuttleCmd, nil + } + // Failure. Try again. + DoLog(fmt.Sprintf("sshuttle failed to start up. Retrying... (attempt %d of %d)", i+1, retryAttempts)) + err = StopSshuttle(t, sshuttleCmd) + if err != nil { + DoLog(err) + } + } + // If we get here, we got through our for loop without verifying that sshuttle was working, so we should stop it and return an error. + err := StopSshuttle(t, sshuttleCmd) + if err != nil { + DoLog(err) + } + return nil, fmt.Errorf("failed to start sshuttle: could not verify that sshuttle was working") +} + +func startSshuttle(t *testing.T, bastionInstanceID string, bastionRegion string, bastionPassword string, vpcCidr string) (*exec.Cmd, error) { + t.Helper() + cmd := exec.Command("sshuttle", "-e", fmt.Sprintf(`sshpass -p "%s" ssh -q -o CheckHostIP=no -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ProxyCommand="aws ssm --region '%s' start-session --target %%h --document-name AWS-StartSSHSession --parameters 'portNumber=%%p'"`, bastionPassword, bastionRegion), "--dns", "--disable-ipv6", "-vr", fmt.Sprintf("ec2-user@%s", bastionInstanceID), vpcCidr) //nolint:gosec + if err := cmd.Start(); err != nil { + return nil, fmt.Errorf("failed to start sshuttle: %w", err) + } + return cmd, nil +} + +// StopSshuttle stops sshuttle. +func StopSshuttle(t *testing.T, cmd *exec.Cmd) error { + t.Helper() + if cmd == nil { + return fmt.Errorf("failed to stop sshuttle: cmd is nil") + } + if cmd.Process == nil { + return fmt.Errorf("failed to stop sshuttle: cmd.Process is nil") + } + if err := cmd.Process.Kill(); err != nil { + return fmt.Errorf("failed to stop sshuttle: %w", err) + } + return nil +} + +// ValidateEFSFunctionality idempotently validates that EFS functionality is working. +func ValidateEFSFunctionality(t *testing.T, tempFolder string) { + t.Helper() + terraformOutputOptions := &terraform.Options{ + TerraformDir: tempFolder, + Logger: logger.Discard, + } + // Validate that var.enable_efs was set to true, otherwise this will always fail. We'll do that by checking for the presence of the output "efs_storageclass_name". + efsStorageClassName := terraform.Output(t, terraformOutputOptions, "efs_storageclass_name") + require.NotNil(t, efsStorageClassName) + require.NotEmpty(t, efsStorageClassName) + + // Get the cluster + cluster, err := GetEKSCluster(t, tempFolder) + require.NoError(t, err) + clientset, err := NewK8sClientset(cluster) + require.NoError(t, err) + // Wait for the job "test-write" in the namespace "default" to complete, with a 2-minute timeout + namespace := "default" + jobName := "test-write" + timeout := 2 * time.Minute + + // Create a context with a timeout + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + // Use PollUntilContextCancel + err = wait.PollUntilContextCancel(ctx, time.Second, true, func(ctx context.Context) (bool, error) { + job, err := clientset.BatchV1().Jobs(namespace).Get(ctx, jobName, metav1.GetOptions{}) + if err != nil { + return false, fmt.Errorf("failed to get kubernetes jobs: %w", err) + } + for _, c := range job.Status.Conditions { + if c.Type == batchv1.JobComplete && c.Status == "True" { + return true, nil + } else if c.Type == batchv1.JobFailed && c.Status == "True" { + return false, fmt.Errorf("job failed") + } + } + return false, nil + }) + + if err != nil { + DoLog("Job did not complete in time: %v\n", err) + } else { + DoLog("Job completed successfully") + } + assert.NoError(t, err) +} + +// DownloadZarfInitPackage idempotently downloads the Zarf init package if it doesn't already exist. +func DownloadZarfInitPackage(t *testing.T) { + t.Helper() + // Download the Zarf init package if it doesn't already exist + err := exec.Command("bash", "-c", `VERSION=$(zarf version); URL=https://github.com/defenseunicorns/zarf/releases/download/${VERSION}/zarf-init-amd64-${VERSION}.tar.zst; TARGET=~/.zarf-cache/zarf-init-amd64-${VERSION}.tar.zst; mkdir -p ~/.zarf-cache; [ -f $TARGET ] || curl -L $URL -o $TARGET`).Run() + require.NoError(t, err) +} + +// ConfigureKubeconfig idempotently uses the AWS CLI to configure the user's kubeconfig file with the new EKS cluster. +func ConfigureKubeconfig(t *testing.T, tempFolder string) { + t.Helper() + terraformOutputOptions := &terraform.Options{ + TerraformDir: tempFolder, + Logger: logger.Discard, + } + eksClusterName := terraform.Output(t, terraformOutputOptions, "eks_cluster_name") + region := terraform.Output(t, terraformOutputOptions, "bastion_region") + err := exec.Command("bash", "-c", fmt.Sprintf("mkdir -p ~/.kube && aws eks update-kubeconfig --name %s --alias %s --region %s", eksClusterName, eksClusterName, region)).Run() //nolint:gosec + require.NoError(t, err) + // Make sure it worked. This command should return without error + err = exec.Command("bash", "-c", "kubectl get nodes").Run() + require.NoError(t, err) +} + +// ValidateZarfInit idempotently ensures that zarf init runs successfully. +func ValidateZarfInit(t *testing.T, tempFolder string) { + t.Helper() + terraformOutputOptions := &terraform.Options{ + TerraformDir: tempFolder, + Logger: logger.Discard, + } + storageClassName := terraform.Output(t, terraformOutputOptions, "efs_storageclass_name") + outputBytes, err := exec.Command("bash", "-c", fmt.Sprintf("zarf init --components=logging,git-server --confirm --no-log-file --no-progress --storage-class %s", storageClassName)).CombinedOutput() //nolint:gosec + if err != nil { + DoLog("zarf init failed: %v\n", err) + DoLog("zarf init output: %s\n", string(outputBytes)) + } + require.NoError(t, err) +} diff --git a/zarf/packages/eks-addons.yaml b/zarf/packages/eks-addons.yaml new file mode 100644 index 0000000..e69de29 diff --git a/zarf/packages/eks.yaml b/zarf/packages/eks.yaml new file mode 100644 index 0000000..e69de29