Skip to content

Commit

Permalink
add cleanup db command
Browse files Browse the repository at this point in the history
Add a "cleanup db" command.
The commands removes all tasks runs that have been started before a
given timestamp.
When run it prints what it will do and gives the user 5sec to abort.
This can be skipped by passing the --force parameter.
It prints the number of deleted records.

--pretend can be passed to not do anything and only print the number of
records that would be deleted.
  • Loading branch information
fho committed Jun 21, 2024
1 parent 9563e59 commit bdd7988
Show file tree
Hide file tree
Showing 6 changed files with 494 additions and 1 deletion.
12 changes: 12 additions & 0 deletions internal/command/cleanup.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package command

import "github.com/spf13/cobra"

var cleanupCmd = &cobra.Command{
Use: "cleanup",
Short: "delete old database records",
}

func init() {
rootCmd.AddCommand(cleanupCmd)
}
131 changes: 131 additions & 0 deletions internal/command/cleanup_db.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
package command

import (
"fmt"
"strings"
"time"

"github.com/simplesurance/baur/v4/internal/command/flag"
"github.com/simplesurance/baur/v4/internal/command/term"

"github.com/spf13/cobra"
)

type cleanupDbCmd struct {
cobra.Command
taskRunsBefore flag.DateTimeFlagValue
force bool
pretend bool
}

const timeFormat = "02 Jan 06 15:04:05 MST"
const cleanupDbGracetime = time.Second * 5

var cleanupDbLongHelp = fmt.Sprintf(`
Delete old data from the baur database.
The command deletes information about tasks runs that started to run before
a given date from the database. It also removes records that became
dangling because all task runs referencing them were deleted.
Task runs that are referenced by a release are not deleted.
The command can be run without access to the baur repository by specifying the
PostgreSQL URI via the environment variable %s.
`,
term.Highlight(envVarPSQLURL),
)

const cleanupDbCmdExample = `
baur cleanup db --pretend --task-runs-before=2023.06.01-13:30
`

func init() {
cleanupCmd.AddCommand(&newCleanupDbCmd().Command)
}

func newCleanupDbCmd() *cleanupDbCmd {
cmd := cleanupDbCmd{
Command: cobra.Command{
Args: cobra.NoArgs,
Use: "db --task-runs-before=DATETIME",
Long: strings.TrimSpace(cleanupDbLongHelp),
Example: strings.TrimSpace(cleanupDbCmdExample),
},
}

cmd.Flags().Var(&cmd.taskRunsBefore, "task-runs-before",
fmt.Sprintf(
"delete tasks that ran before DATETIME\nFormat: %s",
term.Highlight(flag.DateTimeFormatDescr),
),
)

cmd.Flags().BoolVarP(&cmd.pretend, "pretend", "p", false,
"do not delete anything, only pretend how many records would be deleted",
)

cmd.Flags().BoolVarP(&cmd.force, "force", "f", false,
fmt.Sprintf(
"do not wait %s seconds before starting deletion, delete immediately",
cleanupDbGracetime,
),
)

if err := cmd.MarkFlagRequired("task-runs-before"); err != nil {
panic(err)
}

cmd.Run = cmd.run

return &cmd
}

func (c *cleanupDbCmd) run(cmd *cobra.Command, _ []string) {
var op string
if c.pretend {
op = term.Highlight("pretending to delete")
} else {
op = term.Highlight("deleting")
}
stdout.Printf(
"%s tasks runs older then %s and dangling records,\n"+
"tasks runs referenced by releases are kept\n",
op,
term.Highlight(c.taskRunsBefore.Format(timeFormat)),
)

if !c.force {
stdout.Printf("starting in %s seconds, press %s to abort\n",
term.Highlight(cleanupDbGracetime), term.Highlight("CTRL+C"))
time.Sleep(cleanupDbGracetime)
stdout.Println("starting deleting...")
}

psqlURL, err := postgresqlURL()
exitOnErr(err)

storageClt := mustNewCompatibleStorage(psqlURL)
startTime := time.Now()
result, err := storageClt.TaskRunsDelete(cmd.Context(), c.taskRunsBefore.Time, c.pretend)
exitOnErr(err)

stdout.Printf(
"\n"+
"deletion %s in %s, deleted records:\n"+
"%-16s %s\n"+
"%-16s %s\n"+
"%-16s %s\n"+
"%-16s %s\n"+
"%-16s %s\n"+
"%-16s %s\n"+
"%-16s %s\n",
term.GreenHighlight("successful"),
term.FormatDuration(time.Since(startTime)),
"Task Runs:", term.Highlight(result.DeletedTaskRuns),
"Tasks:", term.Highlight(result.DeletedTasks),
"Apps:", term.Highlight(result.DeletedApps),
"Inputs:", term.Highlight(result.DeletedInputs),
"Outputs:", term.Highlight(result.DeletedOutputs),
"Uploads:", term.Highlight(result.DeletedUploads),
"VCSs:", term.Highlight(result.DeletedVCS),
)
}
2 changes: 1 addition & 1 deletion internal/command/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ func getPSQLURIEnv() string {
}

// postgresqlURL returns the value of the environment variable [envVarPSQLURL],
// if is set.
// if set.
// Otherwise it searches for a baur repository and returns the postgresql url
// from the repository config.
// If the repository object is needed, use [mustNewCompatibleStorageRepo]
Expand Down
204 changes: 204 additions & 0 deletions pkg/storage/postgres/delete.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
package postgres

import (
"context"
"errors"
"time"

"github.com/jackc/pgx/v4"

"github.com/simplesurance/baur/v4/pkg/storage"
)

func (c *Client) TaskRunsDelete(ctx context.Context, before time.Time, pretend bool) (*storage.TaskRunsDeleteResult, error) {
var result storage.TaskRunsDeleteResult

err := c.db.BeginFunc(ctx, func(tx pgx.Tx) error {
var err error
if pretend {
defer tx.Rollback(ctx) //nolint: errcheck
}

result.DeletedTaskRuns, err = c.deleteUnusedTaskRuns(ctx, tx, before)
if err != nil {
return err
}

result.DeletedTasks, err = c.deleteUnusedTasks(ctx, tx)
if err != nil {
return err
}

result.DeletedApps, err = c.deleteUnusedApps(ctx, tx)
if err != nil {
return err
}

result.DeletedOutputs, err = c.deleteUnusedOutputs(ctx, tx)
if err != nil {
return err
}

result.DeletedUploads, err = c.deleteUnusedUploads(ctx, tx)
if err != nil {
return err
}

result.DeletedInputs, err = c.deleteUnusedInputs(ctx, tx)
if err != nil {
return err
}

result.DeletedVCS, err = c.deleteUnusedVCS(ctx, tx)
if err != nil {
return err
}

return nil
})

if err != nil && !(pretend && errors.Is(err, pgx.ErrTxClosed)) {
return nil, err
}

return &result, nil
}

func (*Client) deleteUnusedTaskRuns(ctx context.Context, tx pgx.Tx, before time.Time) (int64, error) {
const query = `
DELETE FROM task_run
WHERE start_timestamp < $1
AND task_run.id NOT IN (
SELECT task_run_id FROM release_task_run
)
`

t, err := tx.Exec(ctx, query, before)
if err != nil {
return 0, newQueryError(query, err, before)
}

return t.RowsAffected(), nil
}

func (*Client) deleteUnusedTasks(ctx context.Context, tx pgx.Tx) (int64, error) {
const query = `
DELETE FROM task
WHERE id NOT IN (
SELECT task_run.task_id FROM task_run
)
`
t, err := tx.Exec(ctx, query)
if err != nil {
return 0, newQueryError(query, err)
}

return t.RowsAffected(), nil
}

func (*Client) deleteUnusedApps(ctx context.Context, tx pgx.Tx) (int64, error) {
const query = `
DELETE FROM application
WHERE id NOT IN (
SELECT task.application_id FROM task
)
`
t, err := tx.Exec(ctx, query)
if err != nil {
return 0, newQueryError(query, err)
}

return t.RowsAffected(), nil
}

func (*Client) deleteUnusedOutputs(ctx context.Context, tx pgx.Tx) (int64, error) {
const query = `
DELETE FROM output
WHERE id NOT IN (
SELECT task_run_output.output_id
FROM task_run_output
)
`
t, err := tx.Exec(ctx, query)
if err != nil {
return 0, newQueryError(query, err)
}

return t.RowsAffected(), nil
}

func (*Client) deleteUnusedUploads(ctx context.Context, tx pgx.Tx) (int64, error) {
const query = `
DELETE FROM upload
WHERE id NOT IN (
SELECT task_run_output.upload_id
FROM task_run_output
)
`
t, err := tx.Exec(ctx, query)
if err != nil {
return 0, newQueryError(query, err)
}

return t.RowsAffected(), nil
}

func (*Client) deleteUnusedVCS(ctx context.Context, tx pgx.Tx) (int64, error) {
const query = `
DELETE FROM vcs
WHERE id NOT IN (
SELECT task_run.vcs_id
FROM task_run
)
`
t, err := tx.Exec(ctx, query)
if err != nil {
return 0, newQueryError(query, err)
}

return t.RowsAffected(), nil
}

func (*Client) deleteUnusedInputs(ctx context.Context, tx pgx.Tx) (int64, error) {
var cnt int64
const qInputFiles = `
DELETE FROM input_file
WHERE id NOT IN (
SELECT task_run_file_input.input_file_id
FROM task_run_file_input
)
`
t, err := tx.Exec(ctx, qInputFiles)
if err != nil {
return 0, newQueryError(qInputFiles, err)
}
cnt = t.RowsAffected()

const qInputStrings = `
DELETE FROM input_string
WHERE id NOT IN (
SELECT task_run_string_input.input_string_id
FROM task_run_string_input
)
`
t, err = tx.Exec(ctx, qInputStrings)
if err != nil {
return 0, newQueryError(qInputFiles, err)
}
cnt += t.RowsAffected()

const qInputTasks = `
DELETE FROM input_task
WHERE id NOT IN (
SELECT task_run_task_input.input_task_id
FROM task_run_task_input
)
`
t, err = tx.Exec(ctx, qInputTasks)
if err != nil {
return 0, newQueryError(qInputFiles, err)
}
cnt += t.RowsAffected()

return cnt, nil
}
Loading

0 comments on commit bdd7988

Please sign in to comment.