Skip to content

Commit

Permalink
feature: introduces Prefill method
Browse files Browse the repository at this point in the history
  • Loading branch information
aliszka committed Dec 13, 2024
1 parent 1db0927 commit 331406e
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 0 deletions.
27 changes: 27 additions & 0 deletions benchmark_opt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,33 @@ import (
"testing"
)

// go test -v -bench BenchmarkPrefillNative -benchmem -run ^$ github.com/weaviate/sroar -cpuprofile cpu.prof
func BenchmarkPrefillNative(b *testing.B) {
for i := 0; i < b.N; i++ {
Prefill(200_000_000)
}
}

// go test -v -bench BenchmarkPrefillFromSortedList -benchmem -run ^$ github.com/weaviate/sroar -cpuprofile cpu.prof
func BenchmarkPrefillFromSortedList(b *testing.B) {
prefillBufferSize := 65_536
maxVal := uint64(200_000_000)
inc := uint64(prefillBufferSize)
buf := make([]uint64, prefillBufferSize)

for i := 0; i < b.N; i++ {
finalBM := NewBitmap()

for i := uint64(0); i <= maxVal; i += inc {
j := uint64(0)
for ; j < inc && i+j <= maxVal; j++ {
buf[j] = i + j
}
finalBM.Or(FromSortedList(buf[:j]))
}
}
}

// ================================================================================
//
// BENCHMARKS comparing performance of different merge implementations
Expand Down
85 changes: 85 additions & 0 deletions bitmap_opt.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package sroar

import (
"fmt"
"math"
"sync"
)

Expand Down Expand Up @@ -706,3 +707,87 @@ func (ra *Bitmap) CloneToBuf(buf []byte) *Bitmap {
bm.data = bm.data[:srclen/2]
return bm
}

// Prefill creates bitmap prefilled with elements [0-maxX]
func Prefill(maxX uint64) *Bitmap {
maxCard64 := uint64(maxCardinality)

// maxX should be included, therefore +1
n := maxX / maxCard64
rem := maxX % maxCard64
if rem == maxCard64-1 {
n++
}
rem = (rem + 1) % maxCard64

// create additional container for remaining values (or empty if there are not)
// +1 additional key to avoid keys expanding (there should always be 1 spare)
bm := newBitmapWithSize(int(n)+1+1, maxContainerSize, int(n)*maxContainerSize)

var refContainer []uint16
var remOffset = bm.keys.val(0)

if n > 0 {
refContainer = bm.getContainer(remOffset)
refContainer[indexSize] = maxContainerSize
refContainer[indexType] = typeBitmap
setCardinality(refContainer, maxCardinality)

// fill entire bitmap container with ones
refContainer64 := uint16To64SliceUnsafe(refContainer[startIdx:])
for i := range refContainer64 {
refContainer64[i] = math.MaxUint64
}

// fill remaining containers by copying reference one
for i := uint64(1); i < n; i++ {
key := (i * maxCard64) & mask
offset := bm.newContainerNoClr(maxContainerSize)
bm.setKey(key, offset)

copy(bm.data[offset:], refContainer)
}

// create container for remaining values
key := (n * maxCard64) & mask
remOffset = bm.newContainer(maxContainerSize)
bm.setKey(key, remOffset)
}

container := bm.getContainer(remOffset)
container[indexSize] = maxContainerSize
container[indexType] = typeBitmap
setCardinality(container, int(rem))

if rem > 0 {
n16 := uint16(rem) / 16
rem16 := uint16(rem) % 16

if refContainer != nil {
// refContainer available (maxX >= math.MaxUint16-1),
// fill remaining values container by copying biggest possible slice of refContainer (batches of 16s)
copy(bm.data[remOffset+uint64(startIdx):], refContainer[startIdx:startIdx+n16])
// set remaining bits
for i := uint16(0); i < rem16; i++ {
container[startIdx+n16] |= bitmapMask[i]
}
} else {
// refContainer not available (maxX < math.MaxUint16-1),
// set bits by copying MaxUint64 first, then MaxUint16, then single bits
n64 := uint16(rem) / 64

container64 := uint16To64SliceUnsafe(container[startIdx:])
for i := uint16(0); i < n64; i++ {
container64[i] = math.MaxUint64
}
for i := uint16(n64 * 4); i < n16; i++ {
container[startIdx+i] = math.MaxUint16
}
for i := uint16(0); i < rem16; i++ {
container[startIdx+n16] |= bitmapMask[i]
}
}
}

return bm
}
26 changes: 26 additions & 0 deletions bitmap_opt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1106,6 +1106,32 @@ func TestCloneToBuf(t *testing.T) {
})
}

func TestPrefill(t *testing.T) {
for _, maxX := range []int{
0, 1, 123_456,
maxCardinality / 2,
maxCardinality - 1, maxCardinality, maxCardinality + 1,
maxCardinality*3 - 1, maxCardinality * 3, maxCardinality*3 + 1,
} {
t.Run(fmt.Sprintf("value %d", maxX), func(t *testing.T) {
bm := Prefill(uint64(maxX))

assertPrefilled(t, bm, maxX)
})
}
}

func assertPrefilled(t *testing.T, bm *Bitmap, maxX int) {
require.Equal(t, maxX+1, bm.GetCardinality())

arr := bm.ToArray()
require.Len(t, arr, maxX+1)

for i, x := range arr {
require.Equal(t, uint64(i), x)
}
}

func TestMergeToSuperset(t *testing.T) {
run := func(t *testing.T, bufs [][]uint16) {
containerThreshold := uint64(math.MaxUint16 + 1)
Expand Down

0 comments on commit 331406e

Please sign in to comment.