From 5124bc4ab6c2077864241f37e7a1cc688187b037 Mon Sep 17 00:00:00 2001 From: Andrzej Liszka Date: Tue, 19 Nov 2024 14:49:11 +0100 Subject: [PATCH] performance: Or method and function - allocate memory once for all containers copied from src bitmap --- bitmap.go | 38 +++++++++++----- bitmap_opt.go | 124 ++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 118 insertions(+), 44 deletions(-) diff --git a/bitmap.go b/bitmap.go index c71da8f..8a39029 100644 --- a/bitmap.go +++ b/bitmap.go @@ -152,14 +152,21 @@ func (ra *Bitmap) setKey(k uint64, offset uint64) uint64 { } // ra.keys is full. We should expand its size. + bySize := ra.expandKeys(0) + return offset + bySize +} + +func (ra *Bitmap) expandKeys(bySize uint64) uint64 { curSize := uint64(len(ra.keys) * 4) // Multiply by 4 for U64 -> U16. - bySize := curSize + if bySize == 0 { + bySize = curSize + } if bySize > math.MaxUint16 { bySize = math.MaxUint16 } ra.scootRight(curSize, bySize) - ra.keys = toUint64Slice(ra.data[:curSize+bySize]) + ra.keys = uint16To64SliceUnsafe(ra.data[:curSize+bySize]) ra.keys.setNodeSize(int(curSize + bySize)) // All containers have moved to the right by bySize bytes. @@ -171,31 +178,35 @@ func (ra *Bitmap) setKey(k uint64, offset uint64) uint64 { n.setAt(valOffset(i), val+uint64(bySize)) } } - return offset + bySize + return bySize } func (ra *Bitmap) fastExpand(bySize uint64) { - prev := len(ra.keys) * 4 // Multiply by 4 to convert from u16 to u64. + toSize := ra.expandNoLengthChange(bySize) + ra.data = ra.data[:toSize] +} +func (ra *Bitmap) expandNoLengthChange(bySize uint64) (toSize int) { // This following statement also works. But, given how much fastExpand gets // called (a lot), probably better to control allocation. // ra.data = append(ra.data, empty[:bySize]...) - toSize := len(ra.data) + int(bySize) + toSize = len(ra.data) + int(bySize) if toSize <= cap(ra.data) { - ra.data = ra.data[:toSize] return } growBy := cap(ra.data) if growBy < int(bySize) { growBy = int(bySize) } - out := make([]uint16, cap(ra.data)+growBy) + out := make([]uint16, len(ra.data), cap(ra.data)+growBy) copy(out, ra.data) - ra.data = out[:toSize] + prev := len(ra.keys) * 4 // Multiply by 4 to convert from u16 to u64. + ra.data = out ra._ptr = nil // Allow Go to GC whatever this was pointing to. // Re-reference ra.keys correctly because underlying array has changed. - ra.keys = toUint64Slice(ra.data[:prev]) + ra.keys = uint16To64SliceUnsafe(ra.data[:prev]) + return } // scootRight isn't aware of containers. It's going to create empty space of @@ -221,10 +232,15 @@ func (ra *Bitmap) scootLeft(offset uint64, size uint64) { } func (ra *Bitmap) newContainer(sz uint16) uint64 { + offset := ra.newContainerNoClr(sz) + ra.data[offset] = sz + Memclr(ra.data[offset+1 : offset+uint64(sz)]) + return offset +} + +func (ra *Bitmap) newContainerNoClr(sz uint16) uint64 { offset := uint64(len(ra.data)) ra.fastExpand(uint64(sz)) - Memclr(ra.data[offset : offset+uint64(sz)]) - ra.data[offset] = sz return offset } diff --git a/bitmap_opt.go b/bitmap_opt.go index 4d301c2..248046e 100644 --- a/bitmap_opt.go +++ b/bitmap_opt.go @@ -40,7 +40,7 @@ func andContainers(a, b, res *Bitmap, optBuf []uint16) { bc := b.getContainer(off) if c := containerAndAlt(ac, bc, optBuf, 0); len(c) > 0 && getCardinality(c) > 0 { // create a new container and update the key offset to this container. - offset := res.newContainer(uint16(len(c))) + offset := res.newContainerNoClr(uint16(len(c))) copy(res.data[offset:], c) res.setKey(ak, offset) } @@ -154,7 +154,7 @@ func andNotContainers(a, b, res *Bitmap, optBuf []uint16) { bc := b.getContainer(off) if c := containerAndNotAlt(ac, bc, optBuf, 0); len(c) > 0 && getCardinality(c) > 0 { // create a new container and update the key offset to this container. - offset := res.newContainer(uint16(len(c))) + offset := res.newContainerNoClr(uint16(len(c))) copy(res.data[offset:], c) res.setKey(ak, offset) } @@ -165,7 +165,7 @@ func andNotContainers(a, b, res *Bitmap, optBuf []uint16) { ac := a.getContainer(off) if getCardinality(ac) > 0 { // create a new container and update the key offset to this container. - offset := res.newContainer(uint16(len(ac))) + offset := res.newContainerNoClr(uint16(len(ac))) copy(res.data[offset:], ac) res.setKey(ak, offset) } @@ -175,14 +175,14 @@ func andNotContainers(a, b, res *Bitmap, optBuf []uint16) { } } for ; ai < an; ai++ { - off := a.keys.val(ai) - ac := a.getContainer(off) + offset := a.keys.val(ai) + ac := a.getContainer(offset) if getCardinality(ac) > 0 { ak := a.keys.key(ai) // create a new container and update the key offset to this container. - off = res.newContainer(uint16(len(ac))) - copy(res.data[off:], ac) - res.setKey(ak, off) + offset = res.newContainerNoClr(uint16(len(ac))) + copy(res.data[offset:], ac) + res.setKey(ak, offset) } } } @@ -270,6 +270,11 @@ func orContainers(a, b, res *Bitmap, buf []uint16) { ai, an := 0, a.keys.numKeys() bi, bn := 0, b.keys.numKeys() + akToAc := map[uint64][]uint16{} + bkToBc := map[uint64][]uint16{} + sizeContainers := uint64(0) + sizeKeys := uint64(0) + for ai < an && bi < bn { ak := a.keys.key(ai) bk := b.keys.key(bi) @@ -280,7 +285,12 @@ func orContainers(a, b, res *Bitmap, buf []uint16) { bc := b.getContainer(off) if c := containerOrAlt(ac, bc, buf, 0); len(c) > 0 && getCardinality(c) > 0 { // create a new container and update the key offset to this container. - offset := res.newContainer(uint16(len(c))) + + // Since buffer is used in containers merge, result container has to be copied + // to the bitmap immediately and to let buffer be reused for next merge. + // Therefore container can not be copied at the end of method execution like + // other containers from bitmaps a or b. + offset := res.newContainerNoClr(uint16(len(c))) copy(res.data[offset:], c) res.setKey(ak, offset) } @@ -290,20 +300,18 @@ func orContainers(a, b, res *Bitmap, buf []uint16) { off := a.keys.val(ai) ac := a.getContainer(off) if getCardinality(ac) > 0 { - // create a new container and update the key offset to this container. - offset := res.newContainer(uint16(len(ac))) - copy(res.data[offset:], ac) - res.setKey(ak, offset) + akToAc[ak] = ac + sizeContainers += uint64(len(ac)) + sizeKeys += 8 // 2x uint64 = 8x uint16; for key and offset } ai++ } else { off := b.keys.val(bi) bc := b.getContainer(off) if getCardinality(bc) > 0 { - // create a new container and update the key offset to this container. - offset := res.newContainer(uint16(len(bc))) - copy(res.data[offset:], bc) - res.setKey(bk, offset) + bkToBc[bk] = bc + sizeContainers += uint64(len(bc)) + sizeKeys += 8 // 2x uint64 = 8x uint16; for key and offset } bi++ } @@ -313,10 +321,9 @@ func orContainers(a, b, res *Bitmap, buf []uint16) { ac := a.getContainer(off) if getCardinality(ac) > 0 { ak := a.keys.key(ai) - // create a new container and update the key offset to this container. - offset := res.newContainer(uint16(len(ac))) - copy(res.data[offset:], ac) - res.setKey(ak, offset) + akToAc[ak] = ac + sizeContainers += uint64(len(ac)) + sizeKeys += 8 // 2x uint64 = 8x uint16; for key and offset } } for ; bi < bn; bi++ { @@ -324,8 +331,27 @@ func orContainers(a, b, res *Bitmap, buf []uint16) { bc := b.getContainer(off) if getCardinality(bc) > 0 { bk := b.keys.key(bi) + bkToBc[bk] = bc + sizeContainers += uint64(len(bc)) + sizeKeys += 8 // 2x uint64 = 8x uint16; for key and offset + } + } + + if sizeContainers > 0 { + // ensure enough space for new containers and keys, + // allocate required memory just once avoid copying underlying data slice multiple times + res.expandNoLengthChange(sizeContainers + sizeKeys) + res.expandKeys(sizeKeys) + + for ak, ac := range akToAc { + // create a new container and update the key offset to this container. + offset := res.newContainerNoClr(uint16(len(ac))) + copy(res.data[offset:], ac) + res.setKey(ak, offset) + } + for bk, bc := range bkToBc { // create a new container and update the key offset to this container. - offset := res.newContainer(uint16(len(bc))) + offset := res.newContainerNoClr(uint16(len(bc))) copy(res.data[offset:], bc) res.setKey(bk, offset) } @@ -358,6 +384,12 @@ func orContainersInRange(a, b *Bitmap, bi, bn int, buf []uint16) { ai := a.keys.search(bk) an := a.keys.numKeys() + // copy containers from b to a all at once + // expanding underlying data slice and keys subslice once + bkToBc := map[uint64][]uint16{} + sizeContainers := uint64(0) + sizeKeys := uint64(0) + for ai < an && bi < bn { ak := a.keys.key(ai) bk := b.keys.key(bi) @@ -367,9 +399,26 @@ func orContainersInRange(a, b *Bitmap, bi, bn int, buf []uint16) { boff := b.keys.val(bi) bc := b.getContainer(boff) if c := containerOrAlt(ac, bc, buf, runInline); len(c) > 0 { - // make room for container, replacing smaller one and update key offset to new container. - a.insertAt(aoff, c) - a.setKey(ak, aoff) + // Previously merged container were replacing the old one, + // first moving data to the right to free enough space for the + // merged container to fit. + // That solution turned out to be slower for large datasets than + // appending bitmap with entirely new container, as moving data + // is not needed in that case. + // Reference to prev container is then forgotten resulting in + // memory not being used optimally. + + // Since buffer is used in containers merge, result container has to be copied + // to the bitmap immediately and to let buffer be reused for next merge. + // Therefore container can not be copied at the end of method execution like + // other containers from bitmap b. + offset := a.newContainerNoClr(uint16(len(c))) + copy(a.data[offset:], c) + a.setKey(ak, offset) + + // // make room for container, replacing smaller one and update key offset to new container. + // a.insertAt(aoff, c) + // a.setKey(ak, aoff) } ai++ bi++ @@ -379,13 +428,9 @@ func orContainersInRange(a, b *Bitmap, bi, bn int, buf []uint16) { off := b.keys.val(bi) bc := b.getContainer(off) if getCardinality(bc) > 0 { - // create a new container and update the key offset to this container. - offset := a.newContainer(uint16(len(bc))) - copy(a.data[offset:], bc) - a.setKey(bk, offset) - // key was added to a bitmap. manually increase ai (current index) and an (length) - ai++ - an++ + bkToBc[bk] = bc + sizeContainers += uint64(len(bc)) + sizeKeys += 8 // 2x uint64 = 8x uint16; for key and offset } bi++ } @@ -395,8 +440,21 @@ func orContainersInRange(a, b *Bitmap, bi, bn int, buf []uint16) { bc := b.getContainer(off) if getCardinality(bc) > 0 { bk := b.keys.key(bi) + bkToBc[bk] = bc + sizeContainers += uint64(len(bc)) + sizeKeys += 8 // 2x uint64 = 8x uint16; for key and offset + } + } + + if sizeContainers > 0 { + // ensure enough space for new containers and keys, + // allocate required memory just once avoid copying underlying data slice multiple times + a.expandNoLengthChange(sizeContainers + sizeKeys) + a.expandKeys(sizeKeys) + + for bk, bc := range bkToBc { // create a new container and update the key offset to this container. - offset := a.newContainer(uint16(len(bc))) + offset := a.newContainerNoClr(uint16(len(bc))) copy(a.data[offset:], bc) a.setKey(bk, offset) }