Skip to content

Commit

Permalink
apacheGH-38728: [Go] ipc: put lz4 decompression buffers back into syn…
Browse files Browse the repository at this point in the history
…c.Pool (apache#38729)

The lz4 decompressor was not calling Reset on the underlying writer in its Close method. This could cause buffers not to be released back to the pool and defeating the purpose of the sync.Pool in the lz4 package.

Additionally, a call to Close was missing in readDictionary.

Closes apache#38728 
* Closes: apache#38728

Authored-by: Alfonso Subiotto Marques <[email protected]>
Signed-off-by: Matt Topol <[email protected]>
  • Loading branch information
asubiotto authored and dgreiss committed Feb 17, 2024
1 parent aeb482a commit 9594bf4
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 1 deletion.
4 changes: 3 additions & 1 deletion go/arrow/ipc/compression.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,9 @@ type lz4Decompressor struct {
*lz4.Reader
}

func (z *lz4Decompressor) Close() {}
func (z *lz4Decompressor) Close() {
z.Reader.Reset(nil)
}

func getDecompressor(codec flatbuf.CompressionType) decompressor {
switch codec {
Expand Down
1 change: 1 addition & 0 deletions go/arrow/ipc/file_reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,7 @@ func readDictionary(memo *dictutils.Memo, meta *memory.Buffer, body ReadAtSeeker
bodyCompress := data.Compression(nil)
if bodyCompress != nil {
codec = getDecompressor(bodyCompress.Codec())
defer codec.Close()
}

id := md.Id()
Expand Down
90 changes: 90 additions & 0 deletions go/arrow/ipc/reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ package ipc

import (
"bytes"
"fmt"
"io"
"testing"

"github.com/apache/arrow/go/v15/arrow"
Expand Down Expand Up @@ -93,3 +95,91 @@ func TestReaderCheckedAllocator(t *testing.T) {
_, err = reader.Read()
require.NoError(t, err)
}

func BenchmarkIPC(b *testing.B) {
alloc := memory.NewCheckedAllocator(memory.NewGoAllocator())
defer alloc.AssertSize(b, 0)

schema := arrow.NewSchema([]arrow.Field{
{
Name: "s",
Type: &arrow.DictionaryType{
ValueType: arrow.BinaryTypes.String,
IndexType: arrow.PrimitiveTypes.Int32,
},
},
}, nil)

rb := array.NewRecordBuilder(alloc, schema)
defer rb.Release()

bldr := rb.Field(0).(*array.BinaryDictionaryBuilder)
bldr.Append([]byte("foo"))
bldr.Append([]byte("bar"))
bldr.Append([]byte("baz"))

rec := rb.NewRecord()
defer rec.Release()

for _, codec := range []struct {
name string
codecOption Option
}{
{
name: "plain",
},
{
name: "zstd",
codecOption: WithZstd(),
},
{
name: "lz4",
codecOption: WithLZ4(),
},
} {
options := []Option{WithSchema(schema), WithAllocator(alloc)}
if codec.codecOption != nil {
options = append(options, codec.codecOption)
}
b.Run(fmt.Sprintf("Writer/codec=%s", codec.name), func(b *testing.B) {
buf := new(bytes.Buffer)
for i := 0; i < b.N; i++ {
func() {
buf.Reset()
writer := NewWriter(buf, options...)
defer writer.Close()
if err := writer.Write(rec); err != nil {
b.Fatal(err)
}
}()
}
})

b.Run(fmt.Sprintf("Reader/codec=%s", codec.name), func(b *testing.B) {
buf := new(bytes.Buffer)
writer := NewWriter(buf, options...)
defer writer.Close()
require.NoError(b, writer.Write(rec))
bufBytes := buf.Bytes()

b.ResetTimer()
for i := 0; i < b.N; i++ {
func() {
reader, err := NewReader(bytes.NewReader(bufBytes), WithAllocator(alloc))
if err != nil {
b.Fatal(err)
}
defer reader.Release()
for {
if _, err := reader.Read(); err != nil {
if err == io.EOF {
break
}
b.Fatal(err)
}
}
}()
}
})
}
}

0 comments on commit 9594bf4

Please sign in to comment.