Skip to content

Commit

Permalink
chore(dataobj): Reintroduce sorting of the logs section (#15906)
Browse files Browse the repository at this point in the history
  • Loading branch information
rfratto authored Jan 23, 2025
1 parent 5e4df21 commit 948f5c5
Show file tree
Hide file tree
Showing 22 changed files with 1,158 additions and 235 deletions.
27 changes: 26 additions & 1 deletion pkg/dataobj/dataobj.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,29 @@ type BuilderConfig struct {

// TargetObjectSize configures a target size for data objects.
TargetObjectSize flagext.Bytes `yaml:"target_object_size"`

// TargetSectionSize configures the maximum size of data in a section. Sections
// which support this parameter will place overflow data into new sections of
// the same type.
TargetSectionSize flagext.Bytes `yaml:"target_section_size"`

// BufferSize configures the size of the buffer used to accumulate
// uncompressed logs in memory prior to sorting.
BufferSize flagext.Bytes `yaml:"buffer_size"`
}

// RegisterFlagsWithPrefix registers flags with the given prefix.
func (cfg *BuilderConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
_ = cfg.TargetPageSize.Set("2MB")
_ = cfg.TargetObjectSize.Set("1GB")
_ = cfg.BufferSize.Set("16MB") // Page Size * 8
_ = cfg.TargetSectionSize.Set("128MB") // Target Object Size / 8

f.IntVar(&cfg.SHAPrefixSize, prefix+"sha-prefix-size", 2, "The size of the SHA prefix to use for the data object builder.")
f.Var(&cfg.TargetPageSize, prefix+"target-page-size", "The size of the target page to use for the data object builder.")
f.Var(&cfg.TargetObjectSize, prefix+"target-object-size", "The size of the target object to use for the data object builder.")
f.Var(&cfg.TargetSectionSize, prefix+"target-section-size", "Configures a maximum size for sections, for sections that support it.")
f.Var(&cfg.BufferSize, prefix+"buffer-size", "The size of the buffer to use for sorting logs.")
}

// Validate validates the BuilderConfig.
Expand All @@ -77,6 +90,14 @@ func (cfg *BuilderConfig) Validate() error {
errs = append(errs, errors.New("TargetObjectSize must be greater than 0"))
}

if cfg.BufferSize <= 0 {
errs = append(errs, errors.New("BufferSize must be greater than 0"))
}

if cfg.TargetSectionSize <= 0 || cfg.TargetSectionSize > cfg.TargetObjectSize {
errs = append(errs, errors.New("SectionSize must be greater than 0 and less than or equal to TargetObjectSize"))
}

return errors.Join(errs...)
}

Expand Down Expand Up @@ -148,7 +169,11 @@ func NewBuilder(cfg BuilderConfig, bucket objstore.Bucket, tenantID string) (*Bu
labelCache: labelCache,

streams: streams.New(metrics.streams, int(cfg.TargetPageSize)),
logs: logs.New(metrics.logs, int(cfg.TargetPageSize)),
logs: logs.New(metrics.logs, logs.Options{
PageSizeHint: int(cfg.TargetPageSize),
BufferSize: int(cfg.BufferSize),
SectionSize: int(cfg.TargetSectionSize),
}),

flushBuffer: flushBuffer,
encoder: encoder,
Expand Down
37 changes: 13 additions & 24 deletions pkg/dataobj/dataobj_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ import (
"github.com/grafana/loki/v3/pkg/logql/syntax"
)

var testBuilderConfig = BuilderConfig{
SHAPrefixSize: 2,

TargetPageSize: 2048,
TargetObjectSize: 4096,
TargetSectionSize: 4096,

BufferSize: 2048 * 8,
}

func Test(t *testing.T) {
bucket := objstore.NewInMemBucket()

Expand Down Expand Up @@ -67,16 +77,7 @@ func Test(t *testing.T) {
}

t.Run("Build", func(t *testing.T) {
// Create a tiny builder which flushes a lot of objects and pages to properly
// test the builder.
builderConfig := BuilderConfig{
SHAPrefixSize: 2,

TargetPageSize: 1_500_000,
TargetObjectSize: 10_000_000,
}

builder, err := NewBuilder(builderConfig, bucket, "fake")
builder, err := NewBuilder(testBuilderConfig, bucket, "fake")
require.NoError(t, err)

for _, entry := range streams {
Expand All @@ -94,10 +95,7 @@ func Test(t *testing.T) {

actual, err := result.Collect(reader.Streams(context.Background(), objects[0]))
require.NoError(t, err)

// TODO(rfratto): reenable once sorting is reintroduced.
_ = actual
// require.Equal(t, sortStreams(t, streams), actual)
require.Equal(t, sortStreams(t, streams), actual)
})
}

Expand All @@ -109,16 +107,7 @@ func Test_Builder_Append(t *testing.T) {

bucket := objstore.NewInMemBucket()

// Create a tiny builder which flushes a lot of objects and pages to properly
// test the builder.
builderConfig := BuilderConfig{
SHAPrefixSize: 2,

TargetPageSize: 2048,
TargetObjectSize: 4096,
}

builder, err := NewBuilder(builderConfig, bucket, "fake")
builder, err := NewBuilder(testBuilderConfig, bucket, "fake")
require.NoError(t, err)

for {
Expand Down
12 changes: 12 additions & 0 deletions pkg/dataobj/internal/dataset/column_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package dataset
import (
"fmt"

"github.com/klauspost/compress/zstd"

"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd"
)

Expand All @@ -21,6 +23,16 @@ type BuilderOptions struct {

// Compression is the compression algorithm to use for values.
Compression datasetmd.CompressionType

// CompressionOptions holds optional configuration for compression.
CompressionOptions CompressionOptions
}

// CompressionOptions customizes the compressor used when building pages.
type CompressionOptions struct {
// Zstd holds encoding options for Zstd compression. Only used for
// [datasetmd.COMPRESSION_TYPE_ZSTD].
Zstd []zstd.EOption
}

// A ColumnBuilder builds a sequence of [Value] entries of a common type into a
Expand Down
5 changes: 3 additions & 2 deletions pkg/dataobj/internal/dataset/dataset_iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ func Iter(ctx context.Context, columns []Column) result.Seq[Row] {

type pullColumnIter struct {
Next func() (result.Result[Value], bool)
Stop func()
}

return result.Iter(func(yield func(Row) bool) error {
Expand All @@ -47,7 +46,9 @@ func Iter(ctx context.Context, columns []Column) result.Seq[Row] {
}

next, stop := result.Pull(lazyColumnIter(ctx, col.ColumnInfo(), pages))
pullColumns = append(pullColumns, pullColumnIter{Next: next, Stop: stop})
defer stop()

pullColumns = append(pullColumns, pullColumnIter{Next: next})
}

// Start emitting rows; each row is composed of the next value from all of
Expand Down
98 changes: 80 additions & 18 deletions pkg/dataobj/internal/dataset/page.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ import (
"fmt"
"hash/crc32"
"io"
"sync"

"github.com/golang/snappy"
"github.com/klauspost/compress/zstd"

"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd"
"github.com/grafana/loki/v3/pkg/dataobj/internal/util/bufpool"
)

// Helper types.
Expand Down Expand Up @@ -88,39 +90,99 @@ func (p *MemPage) reader(compression datasetmd.CompressionType) (presence io.Rea
}

var (
bitmapReader = bytes.NewReader(p.Data[n : n+int(bitmapSize)])
compressedDataReader = bytes.NewReader(p.Data[n+int(bitmapSize):])
bitmapData = p.Data[n : n+int(bitmapSize)]
compressedValuesData = p.Data[n+int(bitmapSize):]

bitmapReader = bytes.NewReader(bitmapData)
compressedValuesReader = bytes.NewReader(compressedValuesData)
)

switch compression {
case datasetmd.COMPRESSION_TYPE_UNSPECIFIED, datasetmd.COMPRESSION_TYPE_NONE:
return bitmapReader, io.NopCloser(compressedDataReader), nil
return bitmapReader, io.NopCloser(compressedValuesReader), nil

case datasetmd.COMPRESSION_TYPE_SNAPPY:
sr := snappy.NewReader(compressedDataReader)
return bitmapReader, io.NopCloser(sr), nil
sr := snappyPool.Get().(*snappy.Reader)
sr.Reset(compressedValuesReader)
return bitmapReader, &closerFunc{Reader: sr, onClose: func() error {
snappyPool.Put(sr)
return nil
}}, nil

case datasetmd.COMPRESSION_TYPE_ZSTD:
zr, err := zstd.NewReader(compressedDataReader)
if err != nil {
return nil, nil, fmt.Errorf("opening zstd reader: %w", err)
}
return bitmapReader, newZstdReader(zr), nil
zr := &fixedZstdReader{page: p, data: compressedValuesData}
return bitmapReader, zr, nil
}

panic(fmt.Sprintf("dataset.MemPage.reader: unknown compression type %q", compression.String()))
}

// zstdReader implements [io.ReadCloser] for a [zstd.Decoder].
type zstdReader struct{ *zstd.Decoder }
var snappyPool = sync.Pool{
New: func() interface{} {
return snappy.NewReader(nil)
},
}

type closerFunc struct {
io.Reader
onClose func() error
}

func (c *closerFunc) Close() error { return c.onClose() }

// newZstdReader returns a new [io.ReadCloser] for a [zstd.Decoder].
func newZstdReader(dec *zstd.Decoder) io.ReadCloser {
return &zstdReader{Decoder: dec}
// globalZstdDecoder is a shared zstd decoder for [fixedZstdReader]. Concurrent
// uses of globalZstdDecoder are only safe when using [zstd.Decoder.DecodeAll].
var globalZstdDecoder = func() *zstd.Decoder {
d, err := zstd.NewReader(nil, zstd.WithDecoderConcurrency(1))
if err != nil {
panic(err)
}
return d
}()

// fixedZstdReader is an [io.ReadCloser] that decompresses a zstd buffer in a
// single pass.
type fixedZstdReader struct {
page *MemPage
data []byte

uncompressedBuf *bytes.Buffer
closed bool
}

// Close implements [io.Closer].
func (r *zstdReader) Close() error {
r.Decoder.Close()
func (r *fixedZstdReader) Read(p []byte) (int, error) {
if r.closed {
return 0, io.ErrClosedPipe
}

if r.uncompressedBuf != nil {
return r.uncompressedBuf.Read(p)
}

// We decompress the entire buffer in a single pass. While a pooled zstd
// reader would require less memory and would allow us to stream values as we
// decompress, pooling zstd decoders is difficult to do properly, as it
// requires a finalizer to release resources, and the goroutines spawned by
// decoders prevent the finalizer from ever being called.
//
// To make efficient zstd decoding less error prone, we opt for this instead.
r.uncompressedBuf = bufpool.Get(r.page.Info.UncompressedSize)
r.uncompressedBuf.Reset()

buf, err := globalZstdDecoder.DecodeAll(r.data, r.uncompressedBuf.AvailableBuffer())
if err != nil {
return 0, fmt.Errorf("decoding zstd: %w", err)
}
_, _ = r.uncompressedBuf.Write(buf)

return r.uncompressedBuf.Read(p)
}

func (r *fixedZstdReader) Close() error {
if r.uncompressedBuf != nil {
bufpool.Put(r.uncompressedBuf)
r.uncompressedBuf = nil
}
r.closed = true
return nil
}
12 changes: 9 additions & 3 deletions pkg/dataobj/internal/dataset/page_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func newPageBuilder(opts BuilderOptions) (*pageBuilder, error) {
presenceBuffer = bytes.NewBuffer(nil)
valuesBuffer = bytes.NewBuffer(make([]byte, 0, opts.PageSizeHint))

valuesWriter = newCompressWriter(valuesBuffer, opts.Compression)
valuesWriter = newCompressWriter(valuesBuffer, opts.Compression, opts.CompressionOptions)
)

presenceEnc := newBitmapEncoder(presenceBuffer)
Expand Down Expand Up @@ -174,12 +174,18 @@ func (b *pageBuilder) Flush() (*MemPage, error) {
return nil, fmt.Errorf("no data to flush")
}

// Before we can build the page we need to finish flushing our encoders and writers.
// Before we can build the page we need to finish flushing our encoders and
// writers.
//
// We must call [compressWriter.Close] to ensure that Zstd writers write a
// proper EOF marker, otherwise synchronous decoding can't be used.
// compressWriters can continue to reset and reused after closing, so this is
// safe.
if err := b.presenceEnc.Flush(); err != nil {
return nil, fmt.Errorf("flushing presence encoder: %w", err)
} else if err := b.valuesEnc.Flush(); err != nil {
return nil, fmt.Errorf("flushing values encoder: %w", err)
} else if err := b.valuesWriter.Flush(); err != nil {
} else if err := b.valuesWriter.Close(); err != nil {
return nil, fmt.Errorf("flushing values writer: %w", err)
}

Expand Down
10 changes: 6 additions & 4 deletions pkg/dataobj/internal/dataset/page_compress_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,16 @@ type compressWriter struct {
w io.WriteCloser // Compressing writer.
buf *bufio.Writer // Buffered writer in front of w to be able to call WriteByte.

rawBytes int // Number of uncompressed bytes written.

compression datasetmd.CompressionType // Compression type being used.
rawBytes int // Number of uncompressed bytes written.
opts CompressionOptions // Options to customize compression.
}

var _ streamio.Writer = (*compressWriter)(nil)

func newCompressWriter(w io.Writer, ty datasetmd.CompressionType) *compressWriter {
c := compressWriter{compression: ty}
func newCompressWriter(w io.Writer, ty datasetmd.CompressionType, opts CompressionOptions) *compressWriter {
c := compressWriter{compression: ty, opts: opts}
c.Reset(w)
return &c
}
Expand Down Expand Up @@ -85,7 +87,7 @@ func (c *compressWriter) Reset(w io.Writer) {
compressedWriter = snappy.NewBufferedWriter(w)

case datasetmd.COMPRESSION_TYPE_ZSTD:
zw, err := zstd.NewWriter(w, zstd.WithEncoderLevel(zstd.SpeedBestCompression))
zw, err := zstd.NewWriter(w, c.opts.Zstd...)
if err != nil {
panic(fmt.Sprintf("compressWriter.Reset: creating zstd writer: %v", err))
}
Expand Down
Loading

0 comments on commit 948f5c5

Please sign in to comment.