Skip to content

Commit

Permalink
sql: add copy-based migrations and VACUUM INTO (#6085)
Browse files Browse the repository at this point in the history
-------
## Motivation

Currently, when vacuuming is required after database migration (according to the configured `db-vacuum-state` value), simple SQL `VACUUM` command is used. According to the [description](https://www.sqlite.org/lang_vacuum.html#how_vacuum_works) in the SQLite docs, `VACUUM` requires as much as twice the size of the original database file in free disk space, as first it makes a copy of the database in the temporary directory, and then it copies the vacuumed database back to the original database utilizing WAL file which grows to the size of the database itself before changes are committed.
According to the [remark in the SQLite source](https://github.com/sqlite/sqlite/blob/105c20648e1b05839fd0638686b95f2e3998abcb/src/vacuum.c#L97-L103)

```
** Only 1x temporary space and only 1x writes would be required if
** the copy of step (3) were replaced by deleting the original database
** and renaming the transient database as the original.  But that will
** not work if other processes are attached to the original database.
** And a power loss in between deleting the original and renaming the
** transient would cause the database file to appear to be deleted
** following reboot.
```

In case of go-spacemesh, we don't need concurrent access to the database from multiple processes, so we can optimize the vacuuming step to use only 1x space of the original database for vacuuming, given that `VACUUM INTO` makes a vacuumed copy of the database with no space requirements besides the size of the copy. Moreover, we can use the temporary copy of the database to run migrations faster as we can use `PRAGMA journal_mode=OFF` and `PRAGMA synchronous=OFF` for the temporary database safely, just dropping it if something goes wrong during migration.

On top of requiring 2x space, normal `VACUUM` (without `INTO`) has another problem: very slow last step in which the original database is replaced. Let's compare `VACUUM` with `VACUUM INTO` on a Mac M3 Max laptop:

```console
$ ls -lh /tmp/state.sql*
total 164511960
-rw-r--r--  1 ivan4th  staff    78G Jun 28 00:06 state.sql
-rw-r--r--  1 ivan4th  staff    32K Jun 28 01:32 state.sql-shm
-rw-r--r--  1 ivan4th  staff     0B Jun 28 00:06 state.sql-wal

$ time sqlite3 ~/rmme/sm-data/state.sql vacuum
real    67m48.211s
user    57m13.518s
sys     4m41.778s

$ time sqlite3 ~/rmme/sm-data/state.sql "vacuum into '/tmp/state.sql'"
real    2m27.813s
user    0m43.039s
sys     0m56.265s
```

As it can be seen, `VACUUM INTO` is about 27 times faster on this particular machine.

Another problem described in #6069 is that on Linux systems, `$TMP` directory is utilized during normal `VACUUM`, and in case if it's a RAM disk the vacuuming process may run out of space.
  • Loading branch information
ivan4th committed Aug 22, 2024
1 parent 9d39d6c commit e8cd90a
Show file tree
Hide file tree
Showing 10 changed files with 997 additions and 93 deletions.
2 changes: 1 addition & 1 deletion checkpoint/recovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ func RecoverFromLocalFile(

newDB, err := statesql.Open("file:" + cfg.DbPath())
if err != nil {
return nil, fmt.Errorf("creating new DB: %w", err)
return nil, fmt.Errorf("create new db: %w", err)
}
defer newDB.Close()
logger.Info("populating new database",
Expand Down
2 changes: 1 addition & 1 deletion fetch/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ func TestHandleEpochInfoReq(t *testing.T) {
var resp server.Response
require.NoError(t, codec.Decode(b.Bytes(), &resp))
require.Empty(t, resp.Data)
require.Contains(t, resp.Error, "exec epoch 11: database: no free connection")
require.Contains(t, resp.Error, "exec epoch 11: database closed")
})
})
}
Expand Down
22 changes: 11 additions & 11 deletions fetch/p2p_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ func forStreamingCachedUncached(

func TestP2PPeerEpochInfo(t *testing.T) {
forStreamingCachedUncached(
t, "peer error: getting ATX IDs: exec epoch 11: database: no free connection",
t, "peer error: getting ATX IDs: exec epoch 11: database closed",
func(t *testing.T, ctx context.Context, tpf *testP2PFetch, errStr string) {
epoch := types.EpochID(11)
atxIDs := tpf.createATXs(epoch)
Expand All @@ -291,7 +291,7 @@ func TestP2PPeerEpochInfo(t *testing.T) {

func TestP2PPeerMeshHashes(t *testing.T) {
forStreaming(
t, "peer error: get aggHashes from 7 to 23 by 5: database: no free connection", false,
t, "peer error: get aggHashes from 7 to 23 by 5: database closed", false,
func(t *testing.T, ctx context.Context, tpf *testP2PFetch, errStr string) {
req := &MeshHashRequest{
From: 7,
Expand Down Expand Up @@ -324,7 +324,7 @@ func TestP2PPeerMeshHashes(t *testing.T) {

func TestP2PMaliciousIDs(t *testing.T) {
forStreaming(
t, "database: no free connection", false,
t, "database closed", false,
func(t *testing.T, ctx context.Context, tpf *testP2PFetch, errStr string) {
var bad []types.NodeID
for i := 0; i < 11; i++ {
Expand All @@ -349,7 +349,7 @@ func TestP2PMaliciousIDs(t *testing.T) {

func TestP2PGetATXs(t *testing.T) {
forStreamingCachedUncached(
t, "database: no free connection",
t, "database closed",
func(t *testing.T, ctx context.Context, tpf *testP2PFetch, errStr string) {
epoch := types.EpochID(11)
atx := newAtx(tpf.t, epoch)
Expand All @@ -365,7 +365,7 @@ func TestP2PGetATXs(t *testing.T) {

func TestP2PGetPoet(t *testing.T) {
forStreaming(
t, "database: no free connection", false,
t, "database closed", false,
func(t *testing.T, ctx context.Context, tpf *testP2PFetch, errStr string) {
ref := types.PoetProofRef{0x42, 0x43}
require.NoError(t, poets.Add(tpf.serverCDB, ref, []byte("proof1"), []byte("sid1"), "rid1"))
Expand All @@ -380,7 +380,7 @@ func TestP2PGetPoet(t *testing.T) {

func TestP2PGetBallot(t *testing.T) {
forStreaming(
t, "database: no free connection", false,
t, "database closed", false,
func(t *testing.T, ctx context.Context, tpf *testP2PFetch, errStr string) {
signer, err := signing.NewEdSigner()
require.NoError(t, err)
Expand All @@ -402,7 +402,7 @@ func TestP2PGetBallot(t *testing.T) {

func TestP2PGetActiveSet(t *testing.T) {
forStreamingCachedUncached(
t, "database: no free connection",
t, "database closed",
func(t *testing.T, ctx context.Context, tpf *testP2PFetch, errStr string) {
id := types.RandomHash()
set := &types.EpochActiveSet{
Expand All @@ -421,7 +421,7 @@ func TestP2PGetActiveSet(t *testing.T) {

func TestP2PGetBlock(t *testing.T) {
forStreaming(
t, "database: no free connection", false,
t, "database closed", false,
func(t *testing.T, ctx context.Context, tpf *testP2PFetch, errStr string) {
lid := types.LayerID(111)
bk := types.NewExistingBlock(types.RandomBlockID(), types.InnerBlock{LayerIndex: lid})
Expand Down Expand Up @@ -472,7 +472,7 @@ func TestP2PGetProp(t *testing.T) {

func TestP2PGetBlockTransactions(t *testing.T) {
forStreaming(
t, "database: no free connection", false,
t, "database closed", false,
func(t *testing.T, ctx context.Context, tpf *testP2PFetch, errStr string) {
signer, err := signing.NewEdSigner()
require.NoError(t, err)
Expand All @@ -488,7 +488,7 @@ func TestP2PGetBlockTransactions(t *testing.T) {

func TestP2PGetProposalTransactions(t *testing.T) {
forStreaming(
t, "database: no free connection", false,
t, "database closed", false,
func(t *testing.T, ctx context.Context, tpf *testP2PFetch, errStr string) {
signer, err := signing.NewEdSigner()
require.NoError(t, err)
Expand All @@ -506,7 +506,7 @@ func TestP2PGetProposalTransactions(t *testing.T) {

func TestP2PGetMalfeasanceProofs(t *testing.T) {
forStreaming(
t, "database: no free connection", false,
t, "database closed", false,
func(t *testing.T, ctx context.Context, tpf *testP2PFetch, errStr string) {
nid := types.RandomNodeID()
proof := types.RandomBytes(11)
Expand Down
4 changes: 2 additions & 2 deletions node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -1966,7 +1966,7 @@ func (app *App) setupDBs(ctx context.Context, lg log.Log) error {
}
sqlDB, err := statesql.Open("file:"+filepath.Join(dbPath, dbFile), dbopts...)
if err != nil {
return fmt.Errorf("open sqlite db %w", err)
return fmt.Errorf("open sqlite db: %w", err)
}
app.db = sqlDB
if app.Config.CollectMetrics && app.Config.DatabaseSizeMeteringInterval != 0 {
Expand Down Expand Up @@ -2012,7 +2012,7 @@ func (app *App) setupDBs(ctx context.Context, lg log.Log) error {
sql.WithAllowSchemaDrift(app.Config.DatabaseSchemaAllowDrift),
)
if err != nil {
return fmt.Errorf("open sqlite db %w", err)
return fmt.Errorf("open sqlite db: %w", err)
}
app.localDB = localDB
return nil
Expand Down
Loading

0 comments on commit e8cd90a

Please sign in to comment.