Skip to content

Commit

Permalink
optimize postings regex match all
Browse files Browse the repository at this point in the history
Signed-off-by: Ben Ye <[email protected]>
  • Loading branch information
yeya24 committed Aug 27, 2023
1 parent c5671c6 commit 07f902b
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 42 deletions.
2 changes: 1 addition & 1 deletion tsdb/block.go
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,7 @@ func (pb *Block) Delete(mint, maxt int64, ms ...*labels.Matcher) error {
return ErrClosing
}

p, err := PostingsForMatchers(pb.indexr, ms...)
p, err := PostingsForMatchers(pb.indexr, optimizeMatchAllRegex, ms...)
if err != nil {
return errors.Wrap(err, "select series")
}
Expand Down
4 changes: 4 additions & 0 deletions tsdb/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,9 @@ type Options struct {
// OutOfOrderCapMax is maximum capacity for OOO chunks (in samples).
// If it is <=0, the default value is assumed.
OutOfOrderCapMax int64

// If true, optimize matching all regex .* and .+ when matching postings.
OptimizeMatchAllRegex bool
}

type BlocksToDeleteFunc func(blocks []*Block) map[ulid.ULID]struct{}
Expand Down Expand Up @@ -834,6 +837,7 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs
}
}
}
optimizeMatchAllRegex = opts.OptimizeMatchAllRegex
db.oooWasEnabled.Store(opts.OutOfOrderTimeWindow > 0)
headOpts := DefaultHeadOptions()
headOpts.ChunkRange = rngs[0]
Expand Down
2 changes: 1 addition & 1 deletion tsdb/head.go
Original file line number Diff line number Diff line change
Expand Up @@ -1432,7 +1432,7 @@ func (h *Head) Delete(mint, maxt int64, ms ...*labels.Matcher) error {

ir := h.indexRange(mint, maxt)

p, err := PostingsForMatchers(ir, ms...)
p, err := PostingsForMatchers(ir, optimizeMatchAllRegex, ms...)
if err != nil {
return errors.Wrap(err, "select series")
}
Expand Down
39 changes: 26 additions & 13 deletions tsdb/querier.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ import (
// Bitmap used by func isRegexMetaCharacter to check whether a character needs to be escaped.
var regexMetaCharacterBytes [16]byte

var optimizeMatchAllRegex bool

// isRegexMetaCharacter reports whether byte b needs to be escaped.
func isRegexMetaCharacter(b byte) bool {
return b < utf8.RuneSelf && regexMetaCharacterBytes[b%16]&(1<<(b/16)) != 0
Expand Down Expand Up @@ -129,7 +131,7 @@ func (q *blockQuerier) Select(sortSeries bool, hints *storage.SelectHints, ms ..
maxt := q.maxt
disableTrimming := false

p, err := PostingsForMatchers(q.index, ms...)
p, err := PostingsForMatchers(q.index, optimizeMatchAllRegex, ms...)
if err != nil {
return storage.ErrSeriesSet(err)
}
Expand Down Expand Up @@ -173,7 +175,7 @@ func (q *blockChunkQuerier) Select(sortSeries bool, hints *storage.SelectHints,
maxt = hints.End
disableTrimming = hints.DisableTrimming
}
p, err := PostingsForMatchers(q.index, ms...)
p, err := PostingsForMatchers(q.index, optimizeMatchAllRegex, ms...)
if err != nil {
return storage.ErrChunkSeriesSet(err)
}
Expand Down Expand Up @@ -234,7 +236,7 @@ func findSetMatches(pattern string) []string {

// PostingsForMatchers assembles a single postings iterator against the index reader
// based on the given matchers. The resulting postings are not ordered by series.
func PostingsForMatchers(ix IndexReader, ms ...*labels.Matcher) (index.Postings, error) {
func PostingsForMatchers(ix IndexReader, optimizeMatchAll bool, ms ...*labels.Matcher) (index.Postings, error) {
var its, notIts []index.Postings
// See which label must be non-empty.
// Optimization for case like {l=~".", l!="1"}.
Expand Down Expand Up @@ -289,6 +291,13 @@ func PostingsForMatchers(ix IndexReader, ms ...*labels.Matcher) (index.Postings,
}
its = append(its, it)
default: // l="a"
if optimizeMatchAll && m.Type == labels.MatchRegexp && m.Value == ".+" {
vals, err := ix.LabelValues(m.Name)
if err != nil {
return nil, err
}
return ix.Postings(m.Name, vals...)
}
// Non-Not matcher, use normal postingsForMatcher.
it, err := postingsForMatcher(ix, m)
if err != nil {
Expand All @@ -300,15 +309,19 @@ func PostingsForMatchers(ix IndexReader, ms ...*labels.Matcher) (index.Postings,
its = append(its, it)
}
default: // l=""
// If the matchers for a labelname selects an empty value, it selects all
// the series which don't have the label name set too. See:
// https://github.com/prometheus/prometheus/issues/3575 and
// https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555
it, err := inversePostingsForMatcher(ix, m)
if err != nil {
return nil, err
if optimizeMatchAll && m.Type == labels.MatchRegexp && m.Value == ".*" {
notIts = append(notIts, index.EmptyPostings())
} else {
// If the matchers for a labelname selects an empty value, it selects all
// the series which don't have the label name set too. See:
// https://github.com/prometheus/prometheus/issues/3575 and
// https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555
it, err := inversePostingsForMatcher(ix, m)
if err != nil {
return nil, err
}
notIts = append(notIts, it)
}
notIts = append(notIts, it)
}
}

Expand Down Expand Up @@ -405,7 +418,7 @@ func inversePostingsForMatcher(ix IndexReader, m *labels.Matcher) (index.Posting
}

func labelValuesWithMatchers(r IndexReader, name string, matchers ...*labels.Matcher) ([]string, error) {
p, err := PostingsForMatchers(r, matchers...)
p, err := PostingsForMatchers(r, optimizeMatchAllRegex, matchers...)
if err != nil {
return nil, errors.Wrap(err, "fetching postings for matchers")
}
Expand Down Expand Up @@ -455,7 +468,7 @@ func labelValuesWithMatchers(r IndexReader, name string, matchers ...*labels.Mat
}

func labelNamesWithMatchers(r IndexReader, matchers ...*labels.Matcher) ([]string, error) {
p, err := PostingsForMatchers(r, matchers...)
p, err := PostingsForMatchers(r, optimizeMatchAllRegex, matchers...)
if err != nil {
return nil, err
}
Expand Down
18 changes: 12 additions & 6 deletions tsdb/querier_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,11 @@ func BenchmarkQuerier(b *testing.B) {
ir, err := h.Index()
require.NoError(b, err)
b.Run("Head", func(b *testing.B) {
b.Run("PostingsForMatchers", func(b *testing.B) {
benchmarkPostingsForMatchers(b, ir)
b.Run("PostingsForMatchers non optimized", func(b *testing.B) {
benchmarkPostingsForMatchers(b, ir, false)
})
b.Run("PostingsForMatchers optimized", func(b *testing.B) {
benchmarkPostingsForMatchers(b, ir, true)
})
b.Run("labelValuesWithMatchers", func(b *testing.B) {
benchmarkLabelValuesWithMatchers(b, ir)
Expand All @@ -81,16 +84,19 @@ func BenchmarkQuerier(b *testing.B) {
require.NoError(b, err)
defer ir.Close()
b.Run("Block", func(b *testing.B) {
b.Run("PostingsForMatchers", func(b *testing.B) {
benchmarkPostingsForMatchers(b, ir)
b.Run("PostingsForMatchers non optimized", func(b *testing.B) {
benchmarkPostingsForMatchers(b, ir, false)
})
b.Run("PostingsForMatchers optimized", func(b *testing.B) {
benchmarkPostingsForMatchers(b, ir, true)
})
b.Run("labelValuesWithMatchers", func(b *testing.B) {
benchmarkLabelValuesWithMatchers(b, ir)
})
})
}

func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader, optimize bool) {
n1 := labels.MustNewMatcher(labels.MatchEqual, "n", "1"+postingsBenchSuffix)
nX := labels.MustNewMatcher(labels.MatchEqual, "n", "X"+postingsBenchSuffix)

Expand Down Expand Up @@ -166,7 +172,7 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := PostingsForMatchers(ir, c.matchers...)
_, err := PostingsForMatchers(ir, optimize, c.matchers...)
require.NoError(b, err)
}
})
Expand Down
44 changes: 23 additions & 21 deletions tsdb/querier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2168,29 +2168,31 @@ func TestPostingsForMatchers(t *testing.T) {
}
name += matcher.String()
}
t.Run(name, func(t *testing.T) {
exp := map[string]struct{}{}
for _, l := range c.exp {
exp[l.String()] = struct{}{}
}
p, err := PostingsForMatchers(ir, c.matchers...)
require.NoError(t, err)
for _, optimizeMatchAll := range []bool{false, true} {
t.Run(fmt.Sprintf("%s, optimize=%s", name, strconv.FormatBool(optimizeMatchAllRegex)), func(t *testing.T) {
exp := map[string]struct{}{}
for _, l := range c.exp {
exp[l.String()] = struct{}{}
}
p, err := PostingsForMatchers(ir, optimizeMatchAll, c.matchers...)
require.NoError(t, err)

var builder labels.ScratchBuilder
for p.Next() {
require.NoError(t, ir.Series(p.At(), &builder, &[]chunks.Meta{}))
lbls := builder.Labels()
if _, ok := exp[lbls.String()]; !ok {
t.Errorf("Evaluating %v, unexpected result %s", c.matchers, lbls.String())
} else {
delete(exp, lbls.String())
var builder labels.ScratchBuilder
for p.Next() {
require.NoError(t, ir.Series(p.At(), &builder, &[]chunks.Meta{}))
lbls := builder.Labels()
if _, ok := exp[lbls.String()]; !ok {
t.Errorf("Evaluating %v, unexpected result %s", c.matchers, lbls.String())
} else {
delete(exp, lbls.String())
}
}
}
require.NoError(t, p.Err())
if len(exp) != 0 {
t.Errorf("Evaluating %v, missing results %+v", c.matchers, exp)
}
})
require.NoError(t, p.Err())
if len(exp) != 0 {
t.Errorf("Evaluating %v, missing results %+v", c.matchers, exp)
}
})
}
}
}

Expand Down

0 comments on commit 07f902b

Please sign in to comment.