Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: CloneToBuf method #17

Merged
merged 7 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions benchmark_opt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,71 @@ import (
"testing"
)

// go test -v -bench BenchmarkPrefillNative -benchmem -run ^$ github.com/weaviate/sroar -cpuprofile cpu.prof
func BenchmarkPrefillNative(b *testing.B) {
for i := 0; i < b.N; i++ {
Prefill(200_000_000)
}
}

// go test -v -bench BenchmarkPrefillFromSortedList -benchmem -run ^$ github.com/weaviate/sroar -cpuprofile cpu.prof
func BenchmarkPrefillFromSortedList(b *testing.B) {
prefillBufferSize := 65_536
maxVal := uint64(200_000_000)
inc := uint64(prefillBufferSize)
buf := make([]uint64, prefillBufferSize)

for i := 0; i < b.N; i++ {
finalBM := NewBitmap()

for i := uint64(0); i <= maxVal; i += inc {
j := uint64(0)
for ; j < inc && i+j <= maxVal; j++ {
buf[j] = i + j
}
finalBM.Or(FromSortedList(buf[:j]))
}
}
}

// go test -v -bench BenchmarkFillUpNative -benchmem -run ^$ github.com/weaviate/sroar -cpuprofile cpu.prof
func BenchmarkFillUpNative(b *testing.B) {
for i := 0; i < b.N; i++ {
bm := Prefill(100_000_000)
bm.FillUp(150_000_000)
bm.FillUp(200_000_000)
}
}

// go test -v -bench BenchmarkPrefillFromSortedList -benchmem -run ^$ github.com/weaviate/sroar -cpuprofile cpu.prof
func BenchmarkFillUpFromSortedList(b *testing.B) {
prefillBufferSize := 65_536
prefillX := uint64(100_000_000)
fillupX1 := uint64(150_000_000)
fillupX2 := uint64(200_000_000)
inc := uint64(prefillBufferSize)
buf := make([]uint64, prefillBufferSize)

for i := 0; i < b.N; i++ {
bm := Prefill(prefillX)

for i := prefillX + 1; i <= fillupX1; i += inc {
j := uint64(0)
for ; j < inc && i+j <= fillupX1; j++ {
buf[j] = i + j
}
bm.Or(FromSortedList(buf[:j]))
}
for i := fillupX1 + 1; i <= fillupX2; i += inc {
j := uint64(0)
for ; j < inc && i+j <= fillupX2; j++ {
buf[j] = i + j
}
bm.Or(FromSortedList(buf[:j]))
}
}
}

// ================================================================================
//
// BENCHMARKS comparing performance of different merge implementations
Expand Down
28 changes: 20 additions & 8 deletions bitmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,28 +99,40 @@ func NewBitmap() *Bitmap {
}

func NewBitmapWith(numKeys int) *Bitmap {
return newBitmapWith(numKeys, minContainerSize, 0)
}

func newBitmapWith(numKeys, initialContainerSize, additionalCapacity int) *Bitmap {
if numKeys < 2 {
panic("Must contain at least two keys.")
}
ra := &Bitmap{
// Each key must also keep an offset. So, we need to double the number
// of uint64s allocated. Plus, we need to make space for the first 2
// uint64s to store the number of keys and node size.
data: make([]uint16, 4*(2*numKeys+2)),
}
keysLen := calcInitialKeysLen(numKeys)
buf := make([]uint16, keysLen+initialContainerSize+additionalCapacity)
return newBitampToBuf(keysLen, initialContainerSize, buf)
}

func newBitampToBuf(keysLen, initialContainerSize int, buf []uint16) *Bitmap {
ra := &Bitmap{data: buf[:keysLen]}
ra.keys = toUint64Slice(ra.data)
ra.keys.setNodeSize(len(ra.data))
ra.keys.setNodeSize(keysLen)

// Always generate a container for key = 0x00. Otherwise, node gets confused
// about whether a zero key is a new key or not.
offset := ra.newContainer(minContainerSize)
offset := ra.newContainer(uint16(initialContainerSize))
// First two are for num keys. index=2 -> 0 key. index=3 -> offset.
ra.keys.setAt(indexNodeStart+1, offset)
ra.keys.setNumKeys(1)

return ra
}

func calcInitialKeysLen(numKeys int) int {
// Each key must also keep an offset. So, we need to double the number
// of uint64s allocated. Plus, we need to make space for the first 2
// uint64s to store the number of keys and node size.
return 4 * (2*numKeys + 2)
}

func (ra *Bitmap) initSpaceForKeys(N int) {
if N == 0 {
return
Expand Down
Loading
Loading