Skip to content

Commit

Permalink
.github: workflow for data-plane-controller (WIP)
Browse files Browse the repository at this point in the history
  • Loading branch information
jgraettinger committed Nov 11, 2024
1 parent 2e21aca commit e452cbf
Show file tree
Hide file tree
Showing 7 changed files with 162 additions and 9 deletions.
63 changes: 63 additions & 0 deletions .github/workflows/data-plane-controller.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
name: Deploy data-plane-controller

on:
workflow_dispatch: {}
push:
branches: [johnny/dpc-cd]

env:
CARGO_INCREMENTAL: 0 # Faster from-scratch builds.

jobs:
build:
runs-on: ubuntu-24.04
permissions:
# Permissions required of the Github token in order for
# federated identity and authorization to work.
contents: read
id-token: write
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
lfs: true

- uses: supabase/setup-cli@v1
- run: supabase start

- name: Build `data-plane-controller`
run: cargo build --release -p data-plane-controller

- run: mv target/release/data-plane-controller crates/data-plane-controller/

- name: Authenticate with GCP Workload Identity Federation
uses: google-github-actions/auth@v2
with:
service_account: [email protected]
workload_identity_provider: projects/1084703453822/locations/global/workloadIdentityPools/github-actions/providers/github-actions-provider

- name: Update data-plane-controller job
uses: google-github-actions/deploy-cloudrun@v2
with:
job: data-plane-controller
project_id: estuary-control
region: us-central1
source: crates/data-plane-controller/
timeout: 2h # Self-cancels after 1 hour, with 1 hour grace period.

env_vars: |-
DPC_DATABASE_CA=/etc/db-ca.crt
DPC_DATABASE_URL=postgresql://[email protected]:5432/postgres
NO_COLOR=1
secrets: |-
CONTROL_PLANE_DB_CA_CERT=CONTROL_PLANE_DB_CA_CERT:latest
DPC_GITHUB_SSH_KEY=DPC_GITHUB_SSH_KEY:latest
DPC_IAM_CREDENTIALS=DPC_IAM_CREDENTIALS:latest
DPC_SERVICE_ACCOUNT=DPC_SERVICE_ACCOUNT:latest
PGPASSWORD=POSTGRES_PASSWORD:latest
VULTR_API_KEY=DPC_VULTR_API_KEY:latest
env_vars_update_strategy: overwrite
secrets_update_strategy: overwrite
4 changes: 2 additions & 2 deletions crates/automations/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ models = { path = "../models", features = ["sqlx-support"] }

anyhow = { workspace = true }
futures = { workspace = true }
rand = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
sqlx = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
tracing-subscriber = { workspace = true }

[dev-dependencies]
rand = { workspace = true }
tracing-subscriber = { workspace = true }
6 changes: 5 additions & 1 deletion crates/automations/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,12 @@ async fn ready_tasks_iter(
// If permits remain, there were not enough tasks to dequeue.
// Sleep for up-to `dequeue_interval`, cancelling early if a task completes.
if permits.num_permits() != 0 {
// Jitter dequeue by 10% in either direction, to ensure
// distribution of tasks and retries across executors.
let jitter = 0.9 + rand::random::<f64>() * 0.2; // [0.9, 1.1)

tokio::select! {
() = tokio::time::sleep(dequeue_interval) => (),
() = tokio::time::sleep(dequeue_interval.mul_f64(jitter)) => (),
_ = semaphore.clone().acquire_owned() => (), // Cancel sleep.
}
}
Expand Down
47 changes: 47 additions & 0 deletions crates/data-plane-controller/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
FROM ubuntu:noble

# Install required packages.
RUN apt update -y \
&& apt install --no-install-recommends -y \
ca-certificates \
certbot \
curl \
git \
openssh-client \
python3-certbot-dns-google \
python3-poetry \
python3-venv \
&& rm -rf /var/lib/apt/lists/*

# Install the `pulumi` CLI.
RUN curl -fsSL https://get.pulumi.com/ | bash -s
RUN ln -s /root/.pulumi/bin/pulumi /usr/local/bin/pulumi

# Install the `sops` CLI.
RUN curl -L -o /usr/local/bin/sops https://github.com/getsops/sops/releases/download/v3.9.1/sops-v3.9.1.linux.amd64 && chmod +x /usr/local/bin/sops

# Copy in our local assets.
COPY data-plane-controller /usr/local/bin/
COPY entrypoint.sh /usr/local/bin/

# AWS profile to expect in ~/.aws/credentials
ENV AWS_PROFILE=data-plane-ops
# GCP Service Account JSON credentials path.
ENV GOOGLE_APPLICATION_CREDENTIALS=/etc/data_plane_controller.json
# Disable host-key checks when cloning our git repo.
ENV GIT_SSH_COMMAND="ssh -o StrictHostKeyChecking=no"

ENV RUST_LOG=info

CMD ["/usr/local/bin/entrypoint.sh"]

# Example of running this container locally:
# docker run --rm --net=host -it \
# -e CONTROL_PLANE_DB_CA_CERT="$(</home/johnny/Downloads/prod-ca-2021.crt)" \
# -e DPC_DATABASE_URL="${DATABASE_URL}" \
# -e DPC_GITHUB_SSH_KEY="$(</home/johnny/data_plane_controller.key)" \
# -e DPC_IAM_CREDENTIALS="$(</home/johnny/.aws/credentials)" \
# -e DPC_SERVICE_ACCOUNT="$(</etc/data_plane_controller.json)" \
# -e VULTR_API_KEY="${VULTR_API_KEY}" \
# -e PGPASSWORD=${PGPASSWORD} \
# foobar:latest
40 changes: 40 additions & 0 deletions crates/data-plane-controller/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash

set -o errexit
set -o pipefail
set -o nounset

# Place secrets into expected file locations.
# The SSH key in particular requires a trailing newline.
mkdir /root/.aws
printf '%s\n' "${CONTROL_PLANE_DB_CA_CERT}" > /etc/db-ca.crt
printf '%s\n' "${DPC_GITHUB_SSH_KEY}" > /root/ssh_key
printf '%s\n' "${DPC_IAM_CREDENTIALS}" > /root/.aws/credentials
printf '%s\n' "${DPC_SERVICE_ACCOUNT}" > ${GOOGLE_APPLICATION_CREDENTIALS}

# Start background ssh-agent, evaluate output to set variables, and add SSH key.
chmod 0400 /root/ssh_key
eval "$(ssh-agent -s)"
ssh-add /root/ssh_key

# Log out the IP from which we're running.
echo "Current egress IP:"
curl -s -S http://icanhazip.com

# Start data-plane-controller in the background
data-plane-controller &
DPC_PID=$!

# Start a background timer to send SIGINT after one hour.
(
sleep 3600
kill -INT ${DPC_PID} 2>/dev/null || true
) &

# Wait for data-plane-controller to exit and surface it's status.
set +o errexit
wait ${DPC_PID}
DPC_STATUS=${?}

echo "data-plane-controller exited with status ${DPC_STATUS}"
exit ${DPC_STATUS}
7 changes: 3 additions & 4 deletions crates/data-plane-controller/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,18 @@ pub struct Args {
env = "DPC_DATABASE_URL",
default_value = "postgres://postgres:[email protected]:5432/postgres"
)]
#[serde(skip_serializing)]
database_url: url::Url,
/// Path to CA certificate of the database.
#[clap(long = "database-ca", env = "DPC_DATABASE_CA")]
database_ca: Option<String>,
/// Number of tasks which may be polled concurrently.
#[clap(long = "concurrency", env = "DPC_CONCURRENCY", default_value = "2")]
#[clap(long = "concurrency", env = "DPC_CONCURRENCY", default_value = "1")]
concurrency: u32,
/// Interval between polls for dequeue-able tasks when otherwise idle.
#[clap(
long = "dequeue-interval",
env = "DPC_DEQUEUE_INTERVAL",
default_value = "5s"
default_value = "10s"
)]
#[serde(with = "humantime_serde")]
#[arg(value_parser = humantime::parse_duration)]
Expand Down Expand Up @@ -96,7 +95,7 @@ pub async fn run(args: Args) -> anyhow::Result<()> {
}

let pg_pool = sqlx::postgres::PgPoolOptions::new()
.acquire_timeout(std::time::Duration::from_secs(5))
.acquire_timeout(std::time::Duration::from_secs(30))
.connect_with(pg_options)
.await
.context("connecting to database")?;
Expand Down
4 changes: 2 additions & 2 deletions crates/data-plane-controller/src/stack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ pub struct DataPlane {
pub control_plane_api: url::Url,
pub data_buckets: Vec<url::Url>,
pub gcp_project: String,
pub ssh_subnets: Vec<ipnetwork::Ipv6Network>,
pub ssh_subnets: Vec<ipnetwork::IpNetwork>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub private_links: Vec<AWSPrivateLink>,
pub deployments: Vec<Deployment>,
Expand Down Expand Up @@ -86,7 +86,7 @@ pub struct AnsibleRole {

#[derive(Debug, serde::Serialize, serde::Deserialize)]
pub struct AnsibleHost {
pub ansible_host: std::net::Ipv6Addr,
pub ansible_host: std::net::IpAddr,
pub ansible_user: String,
pub host_fqdn: String,
pub local_cert_pem: String,
Expand Down

0 comments on commit e452cbf

Please sign in to comment.