From 985fa54effaf66b9054e9f78be209820fe752358 Mon Sep 17 00:00:00 2001 From: Taylor Downs Date: Sat, 11 Nov 2023 21:44:27 +0000 Subject: [PATCH] migration code for v0.9.3 users, closes #1363 --- CHANGELOG.md | 53 +++++++++ lib/lightning/setup_utils.ex | 110 ++++++++++++++++++ ...20230904113118_trigger_jobs_into_nodes.exs | 44 +------ ...31005062844_add_attempt_id_to_log_line.exs | 7 +- 4 files changed, 174 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a466709e1d..774d3dc83f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,61 @@ and this project adheres to ## [Unreleased] +### 🚨 Breaking change warning! 🚨 + +This release will contain breaking changes as we've significantly improved both +the workflow building and execution systems. + +#### Nodes and edges + +Before, workflows were represented as a list of jobs and triggers. For greater +flexibility and control of complex workflows, we've moved towards a more robust +"nodes and edges" approach. Where jobs in a workflow (a node) can be connected +by edges. + +Triggers still exist, but live "outside" the directed acyclic graph (DAG) and +are used to automatically create workorders and attempts. + +We've provided migrations that bring `v0.9.3` workflows in line with the +`v0.10.0` requirements. + +#### Scalable workers + +Before, Lightning spawned child processes to execute attempts in sand-boxed +NodeVMs on the same server. This created inefficiencies and security +vulnerabilities. Now, the Lightning web server adds attempts to a queue and +multiple worker applications can pull from that queue to process work. + +In dev mode, this all happens automatically and on one machine, but in most +high-availability production environments the workers will be on another server. + +Attempts are now handled entirely by the workers, and they report back to +Lightning. Exit reasons, final attempt states, error types and error messages +are either entirely new or handled differently now, but we have provided +migration scripts that will work to bring _most_ `v0.9.3` runs, attempts, and +workorders up to `v0.10.0`, though the granularity of `v0.9.3` states and exits +will be less than `v0.10.0` and the final states are not guaranteed to be +accurate for workflows with multiple branches and leaf nodes with varying exit +reasons. + +The migration scripts can be run with a single function call in SetupUtils from +a connect `iex` session: + +``` +Lightning.SetupUtils.approximate_state_for_attempts_and_workorders() +``` + +Note that (like lots of _other_ functionality in `SetupUtils`, calling this +function is a destructive action and you should only do it if you've backed up +your data and you know what you're doing.) + +As always, we recommend backing up your data before migrating. (And thanks for +bearing with us as we move towards our first stable Lightning release.) + ### Added +- Migration helper code to move from `v0.9.3` to `v0.10.0` added to SetupUtils + [#1363](https://github.com/OpenFn/Lightning/issues/1363) - Option to start with `RTM=false iex -S mix phx.server` for opting out of the dev-mode automatic runtime manager. - Webhook Authentication Methods database and CRUD operations diff --git a/lib/lightning/setup_utils.ex b/lib/lightning/setup_utils.ex index aa3e987720..c1dd3bd580 100644 --- a/lib/lightning/setup_utils.ex +++ b/lib/lightning/setup_utils.ex @@ -985,4 +985,114 @@ defmodule Lightning.SetupUtils do dataclip end + + @doc """ + This is a helper function that can be used to aid instance administrators in + migrating from v0.9.3 to v0.10.0. + + While the final state of an attempt is now handled by the worker, this script + can be called once after migration (with no workers attached) to provide an + approximate final state for attempts that were finished before the migration. + """ + def approximate_state_for_attempts_and_workorders do + # Set attempts state by exit_reason of their last run. + Repo.query!(""" + UPDATE attempts + SET state = ( + SELECT + DISTINCT ON + (a.id) CASE + WHEN runs.exit_reason = 'success' THEN 'success' + ELSE 'failed' + END + FROM + runs + JOIN attempt_runs ar ON + ar.run_id = runs.id + JOIN attempts a ON + a.id = ar.attempt_id + WHERE + a.id = attempts.id + ORDER BY + a.id ASC, + runs.finished_at DESC + ) + WHERE attempts.state NOT IN ('available', 'claimed', 'started'); + """) + + # Set attempts finished_at by finished at of their last run. + Repo.query!(""" + UPDATE attempts + SET finished_at = ( + SELECT + DISTINCT ON + (a.id) runs.finished_at + FROM + runs + JOIN attempt_runs ar ON + ar.run_id = runs.id + JOIN attempts a ON + a.id = ar.attempt_id + WHERE + a.id = attempts.id + ORDER BY + a.id ASC, + runs.finished_at DESC + ) WHERE attempts.state NOT IN ('available', 'claimed', 'started'); + """) + + # Set attempts error type to a generic, pre v0.10.0 error. + Repo.query!(""" + UPDATE attempts + SET error_type = 'Error' + WHERE error_type IS NULL + AND state = 'failed'; + """) + + # Set runs error type to a generic, pre v0.10.0 error. + Repo.query!(""" + UPDATE runs + SET error_type = 'Error' + WHERE error_type IS NULL + AND exit_reason = 'fail'; + """) + + # Set state for workorders by the state from their last attempts. + Repo.query!(""" + UPDATE work_orders + SET state = ( + SELECT + DISTINCT ON + (a.work_order_id) a.state + FROM + attempts a + WHERE + a.work_order_id = work_orders.id + ORDER BY + a.work_order_id ASC, + a.finished_at DESC + ) + WHERE work_orders.state NOT IN ('available', 'claimed', 'started'); + """) + + # Set last activity for workorders by the last finished_at from attempts. + Repo.query!(""" + UPDATE + work_orders + SET + last_activity = ( + SELECT + DISTINCT ON + (a.work_order_id) a.finished_at + FROM + attempts a + WHERE + a.work_order_id = work_orders.id + ORDER BY + a.work_order_id ASC, + a.finished_at DESC + ) + WHERE work_orders.state NOT IN ('available', 'claimed', 'started'); + """) + end end diff --git a/priv/repo/migrations/20230904113118_trigger_jobs_into_nodes.exs b/priv/repo/migrations/20230904113118_trigger_jobs_into_nodes.exs index 8c78bbae0f..0691ce5b86 100644 --- a/priv/repo/migrations/20230904113118_trigger_jobs_into_nodes.exs +++ b/priv/repo/migrations/20230904113118_trigger_jobs_into_nodes.exs @@ -15,42 +15,6 @@ defmodule Lightning.Repo.Migrations.TriggerJobsIntoNodes do add :created_by_id, references(:users, type: :binary_id, on_delete: :delete_all), null: true end - # Delete attempts that the starting node or dataclip can't easily be found - execute( - """ - WITH flat_attempts AS ( - SELECT a.id, - ir.type, - ir.trigger_id AS starting_trigger_id, - CASE - WHEN ir.trigger_id IS NULL THEN r.job_id - ELSE NULL - END AS starting_job_id, - ir.dataclip_id - FROM attempts a - JOIN invocation_reasons ir ON ir.id = a.reason_id - LEFT JOIN ( - SELECT DISTINCT ON (attempt_id) attempt_id, - run_id - FROM attempt_runs - ORDER BY attempt_id, - inserted_at ASC - ) AS ar - LEFT JOIN runs r ON r.id = ar.run_id ON a.id = ar.attempt_id - ) - DELETE FROM attempts - WHERE id IN - ( - SELECT id - FROM flat_attempts fa - WHERE - (fa.starting_trigger_id IS NULL AND fa.starting_job_id IS NULL) - OR fa.dataclip_id IS NULL - ); - """, - "" - ) - execute """ WITH flat_attempts AS ( SELECT a.id, @@ -60,15 +24,19 @@ defmodule Lightning.Repo.Migrations.TriggerJobsIntoNodes do WHEN ir.trigger_id IS NULL THEN r.job_id ELSE NULL END AS starting_job_id, - ir.dataclip_id + COALESCE(ir.dataclip_id, ir_runs.input_dataclip_id) as dataclip_id FROM attempts a JOIN invocation_reasons ir ON ir.id = a.reason_id + LEFT JOIN runs ir_runs ON ir.run_id = ir_runs.id LEFT JOIN ( SELECT DISTINCT ON (attempt_id) attempt_id, run_id FROM attempt_runs + JOIN attempts ON attempts.id = attempt_runs.attempt_id + JOIN runs ON runs.id = attempt_runs.run_id + WHERE runs.inserted_at >= attempts.inserted_at ORDER BY attempt_id, - inserted_at ASC + attempt_runs.inserted_at ASC ) AS ar LEFT JOIN runs r ON r.id = ar.run_id ON a.id = ar.attempt_id ) diff --git a/priv/repo/migrations/20231005062844_add_attempt_id_to_log_line.exs b/priv/repo/migrations/20231005062844_add_attempt_id_to_log_line.exs index 6932719c1f..2d8f2c9f7c 100644 --- a/priv/repo/migrations/20231005062844_add_attempt_id_to_log_line.exs +++ b/priv/repo/migrations/20231005062844_add_attempt_id_to_log_line.exs @@ -15,8 +15,11 @@ defmodule Lightning.Repo.Migrations.AddAttemptIdToLogLine do execute """ UPDATE log_lines SET attempt_id = ( - SELECT attempt_id FROM attempt_runs - WHERE attempt_runs.run_id = log_lines.run_id + SELECT attempt_id from runs + JOIN attempt_runs ar ON ar.run_id = runs.id + JOIN attempts a ON a.id = ar.attempt_id + WHERE runs.inserted_at >= a.inserted_at + AND runs.id = log_lines.run_id ) """, ""