-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindexer.go
152 lines (124 loc) · 3.46 KB
/
indexer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
package ethwal
import (
"cmp"
"context"
"fmt"
"math"
"path"
"sync"
"github.com/0xsequence/ethwal/storage"
"github.com/0xsequence/ethwal/storage/local"
"github.com/RoaringBitmap/roaring/v2/roaring64"
"github.com/c2h5oh/datasize"
"golang.org/x/sync/errgroup"
)
const IndexesDirectory = ".indexes"
type IndexerOptions[T any] struct {
Dataset Dataset
FileSystem storage.FS
Indexes Indexes[T]
}
func (o IndexerOptions[T]) WithDefaults() IndexerOptions[T] {
o.FileSystem = cmp.Or(o.FileSystem, storage.FS(local.NewLocalFS("")))
return o
}
type Indexer[T any] struct {
indexes map[IndexName]Index[T]
indexUpdates map[IndexName]*IndexUpdate
fs storage.FS
mu sync.Mutex
}
func NewIndexer[T any](ctx context.Context, opt IndexerOptions[T]) (*Indexer[T], error) {
// apply default options on uninitialized fields
opt = opt.WithDefaults()
// mount indexes directory
fs := storage.NewPrefixWrapper(opt.FileSystem, fmt.Sprintf("%s/", path.Join(opt.Dataset.FullPath(), IndexesDirectory)))
// populate indexUpdates with last block number indexed
indexMaps := make(map[IndexName]*IndexUpdate)
for _, index := range opt.Indexes {
lastBlockNum, err := index.LastBlockNumIndexed(ctx, fs)
if err != nil {
return nil, fmt.Errorf("Indexer.NewIndexer: failed to get last block number indexed for %s: %w", index.Name(), err)
}
indexMaps[index.name] = &IndexUpdate{Data: make(map[IndexedValue]*roaring64.Bitmap), LastBlockNum: lastBlockNum}
}
return &Indexer[T]{
indexes: opt.Indexes,
indexUpdates: indexMaps,
fs: fs,
}, nil
}
func (i *Indexer[T]) Index(ctx context.Context, block Block[T]) error {
for _, index := range i.indexes {
bmUpdate, err := index.IndexBlock(ctx, i.fs, block)
if err != nil {
return err
}
if bmUpdate == nil {
continue
}
i.mu.Lock()
updateBatch := i.indexUpdates[index.name]
updateBatch.Merge(bmUpdate)
i.indexUpdates[index.name] = updateBatch
i.mu.Unlock()
}
return nil
}
func (i *Indexer[T]) EstimatedBatchSize() datasize.ByteSize {
i.mu.Lock()
defer i.mu.Unlock()
var size datasize.ByteSize = 0
for _, indexUpdate := range i.indexUpdates {
for _, bm := range indexUpdate.Data {
size += datasize.ByteSize(bm.GetSizeInBytes())
}
}
return size
}
func (i *Indexer[T]) Flush(ctx context.Context) error {
i.mu.Lock()
defer i.mu.Unlock()
errGrp, gCtx := errgroup.WithContext(ctx)
for name, indexUpdate := range i.indexUpdates {
idx, ok := i.indexes[name]
if !ok {
continue
}
errGrp.Go(func() error {
err := idx.Store(gCtx, i.fs, indexUpdate)
if err != nil {
return err
}
return nil
})
}
err := errGrp.Wait()
if err != nil {
return fmt.Errorf("Indexer.Flush: failed to flush indexes: %w", err)
}
// clear indexUpdates
for _, index := range i.indexes {
i.indexUpdates[index.name].Data = make(map[IndexedValue]*roaring64.Bitmap)
}
return nil
}
// BlockNum returns the lowest block number indexed by all indexes. If no blocks have been indexed, it returns 0.
// This is useful for determining the starting block number for a new Indexer.
func (i *Indexer[T]) BlockNum() uint64 {
i.mu.Lock()
defer i.mu.Unlock()
var lowestBlockNum uint64 = math.MaxUint64
for _, indexUpdate := range i.indexUpdates {
if indexUpdate.LastBlockNum < lowestBlockNum {
lowestBlockNum = indexUpdate.LastBlockNum
}
}
if lowestBlockNum == math.MaxUint64 {
return 0
}
return lowestBlockNum
}
func (i *Indexer[T]) Close(ctx context.Context) error {
return i.Flush(ctx)
}