-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsigma.go
598 lines (540 loc) · 16.7 KB
/
sigma.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
// Copyright 2024 RunReveal Inc.
// SPDX-License-Identifier: Apache-2.0
// Package sigmalite provides a parser and an execution engine
// for the [Sigma detection format].
//
// [Sigma detection format]: https://sigmahq.io/
package sigmalite
import (
"encoding/base64"
"fmt"
"net/netip"
"regexp"
"slices"
"strings"
"sync"
)
// Rule represents a parsed Sigma rule file.
type Rule struct {
// Title is a short description of what the rule detects.
Title string
// ID is an optional globally unique identifier for the rule.
ID string
// Related is a set of references to other rules.
Related []Relation
// Status is an optional indicator of the stability of the rule.
Status Status
// Description is a long-form description of what the rule detects.
Description string
// References is a set of references that the rule was derived from.
// By convention, this is a set of URLs.
References []string
// Author is the creator of the rule.
Author string
// Date is the creation date of the rule.
Date Date
// Modified is the last modification date of the rule.
// By convention, Modified is updated whenever
// the Detection, Level, LogSource, or Title is changed,
// or whenever Status changes to [Deprecated].
Modified Date
// Tags is a set of categories applied to the rule.
// See https://github.com/SigmaHQ/sigma-specification/blob/main/Tags_specification.md
// for more details.
Tags []string
// Level indicates the criticality of the rule.
Level Level
// LogSource describes the log data on which the detection is meant to be applied to.
LogSource *LogSource
// Detection describes the pattern that a rule is matching on.
Detection *Detection
// Fields is a list of log fields that could be interesting in further analysis of the event
// and should be displayed to the analyst.
Fields []string
// FalsePositives is a list of known false positives that may occur.
FalsePositives []string
// Extra is a set of YAML nodes for the unprocessed top-level fields.
Extra map[string]Decoder
}
// Implementation note:
// The Decoder interface exists so that *yaml.Node is not part of the public interface of this package.
// Applications can type-assert if they really want,
// but that will naturally break or permit multiple types as desired.
// A Decoder is a value that can be decoded into a Go value.
type Decoder interface {
Decode(v any) error
}
// LogSource describes the log data on which a [Detection] is meant to be applied to.
type LogSource struct {
Category string
Product string
Service string
Definition string
}
// LogEntry represents an entry that a [Rule] can match on.
type LogEntry struct {
Message string
Fields map[string]string
}
// MatchOptions are the parameters to [Detection.Matches] and [Expr].ExprMatches.
type MatchOptions struct {
Placeholders map[string][]string
}
// Detection describes the pattern that a [Rule] is matching on.
type Detection struct {
Expr Expr
}
// Matches reports whether the entry matches the detection's expression.
func (d *Detection) Matches(entry *LogEntry, opts *MatchOptions) bool {
return d.Expr.ExprMatches(entry, opts)
}
// An Expr is a sub-expression inside of a [Detection].
//
// ExprMatches reports whether an entry matches the expression.
// Implementations of ExprMatches must be safe to call concurrently
// from multiple goroutines.
type Expr interface {
ExprMatches(*LogEntry, *MatchOptions) bool
}
// NamedExpr is an [Expr] that has a name.
// These are referred to as "search identifiers" in the specification.
type NamedExpr struct {
Name string
X Expr
}
func (n *NamedExpr) ExprMatches(entry *LogEntry, opts *MatchOptions) bool {
return n.X.ExprMatches(entry, opts)
}
// NotExpr is a negated [Expr].
type NotExpr struct {
X Expr
}
func (x *NotExpr) ExprMatches(entry *LogEntry, opts *MatchOptions) bool {
return !x.X.ExprMatches(entry, opts)
}
// AndExpr is an [Expr]
// that evaluates to true if and only if all of its sub-expressions evaluate to true.
type AndExpr struct {
X []Expr
}
func (a *AndExpr) ExprMatches(entry *LogEntry, opts *MatchOptions) bool {
for _, x := range a.X {
if !x.ExprMatches(entry, opts) {
return false
}
}
return true
}
// OrExpr is an [Expr]
// that evaluates to true if at least one of its sub-expressions evaluate to true.
type OrExpr struct {
X []Expr
}
func (o *OrExpr) ExprMatches(entry *LogEntry, opts *MatchOptions) bool {
for _, x := range o.X {
if x.ExprMatches(entry, opts) {
return true
}
}
return false
}
// A SearchAtom is an [Expr] that matches against a single field.
type SearchAtom struct {
// Field is the name of the field to match against.
// If empty, then this matches against the message.
Field string
// Modifiers is a sequence of zero or more modifiers to apply against the field
// before checking Patterns.
Modifiers []string
// Patterns is the set of patterns to check against the field.
// If one of them matches, then the field matches this atom.
Patterns []string
mu sync.RWMutex
compiledIsMessage bool
compiledModifiers []string
compiledPatterns []string
compiled compiledSearchAtom
}
type compiledSearchAtom struct {
regexp []*regexp.Regexp
cidr []netip.Prefix
}
// Validate returns an error if the search atom won't match
// because the modifiers or patterns are invalid.
func (atom *SearchAtom) Validate() error {
if len(atom.Patterns) == 0 {
return fmt.Errorf("no patterns")
}
const (
globPattern = iota
rePattern
cidr
)
patternType := globPattern
expand := false
for i, mod := range atom.Modifiers {
switch mod {
case "re":
patternType = rePattern
if len(atom.Modifiers) != 1 || (len(atom.Modifiers) == 2 && i == 1 && atom.Modifiers[0] == "expand") {
return fmt.Errorf("re must be only modifier")
}
case "cidr":
patternType = cidr
if len(atom.Modifiers) != 1 || (len(atom.Modifiers) == 2 && i == 1 && atom.Modifiers[0] == "expand") {
return fmt.Errorf("cidr must be only modifier")
}
case "contains", "all", "startswith", "endswith", "windash", "base64":
// No special handling required.
case "base64offset":
for _, pat := range atom.Patterns {
if len(pat) < 3 {
return fmt.Errorf("base64offset patterns must be at least 3 characters long")
}
}
case "expand":
expand = true
if i != 0 {
return fmt.Errorf("expand can only be the first modifier")
}
for _, placeholder := range atom.Patterns {
if _, ok := cutPlaceholder(placeholder); !ok {
return fmt.Errorf("placeholder %q must start and end with '%%'", placeholder)
}
}
default:
return fmt.Errorf("unknown modifier %q", mod)
}
}
// Validate static patterns.
if !expand {
switch patternType {
case rePattern:
for i, pat := range atom.Patterns {
if _, err := regexp.Compile(pat); err != nil {
return fmt.Errorf("pattern %d: %v", i+1, err)
}
}
case cidr:
for i, pat := range atom.Patterns {
if _, err := netip.ParsePrefix(pat); err != nil {
return fmt.Errorf("pattern %d: %v", i+1, err)
}
}
}
}
return nil
}
func (atom *SearchAtom) expandPatterns(placeholders map[string][]string) []string {
if len(atom.Modifiers) > 0 && atom.Modifiers[0] == "expand" {
var patterns []string
for _, placeholder := range atom.Patterns {
name, ok := cutPlaceholder(placeholder)
if !ok {
continue
}
patterns = append(patterns, placeholders[name]...)
}
return patterns
}
return atom.Patterns
}
func (atom *SearchAtom) ExprMatches(entry *LogEntry, opts *MatchOptions) bool {
if err := atom.Validate(); err != nil {
return false
}
field := entry.Message
if atom.Field != "" {
// Try an exact match first for efficiency
if v, ok := entry.Fields[atom.Field]; ok {
field = v
} else {
// If no exact match, try case insensitive
wantField := strings.ToLower(atom.Field)
for k, v := range entry.Fields {
if strings.ToLower(k) == wantField {
field = v
break
}
}
}
}
var placeholders map[string][]string
if opts != nil {
placeholders = opts.Placeholders
}
return atom.compile(placeholders).matches(field)
}
func (atom compiledSearchAtom) matches(field string) bool {
if len(atom.regexp)+len(atom.cidr) == 0 {
return false
}
// Regexps are chained by AND.
for _, pat := range atom.regexp {
if !pat.MatchString(field) {
return false
}
}
if len(atom.cidr) == 0 {
return true
}
// CIDRs are chained by OR.
addr, err := netip.ParseAddr(field)
if err != nil {
return false
}
for _, prefix := range atom.cidr {
if prefix.Contains(addr) {
return true
}
}
return false
}
func (atom *SearchAtom) compile(placeholders map[string][]string) compiledSearchAtom {
patterns := atom.expandPatterns(placeholders)
if len(patterns) == 0 {
// Can happen if the placeholders are invalid.
return compiledSearchAtom{}
}
// Common case: we already compiled the atom.
atom.mu.RLock()
if atom.lockedCompileUpToDate(patterns) {
compiled := atom.compiled
atom.mu.RUnlock()
return compiled
}
atom.mu.RUnlock()
// Compile a new regular expression
// (outside the critical section to reduce contention).
var compiled compiledSearchAtom
switch {
case slices.Contains(atom.Modifiers, "cidr"):
compiled.cidr = make([]netip.Prefix, 0, len(patterns))
for _, pat := range patterns {
prefix, err := netip.ParsePrefix(pat)
if err != nil {
continue
}
compiled.cidr = append(compiled.cidr, prefix)
}
case slices.Contains(atom.Modifiers, "all"):
compiled.regexp = make([]*regexp.Regexp, 0, len(patterns))
sb := new(strings.Builder)
for _, pat := range patterns {
sb.Reset()
if appendPatternRegexp(sb, pat, atom.Modifiers, atom.isMessage()) {
compiled.regexp = append(compiled.regexp, regexp.MustCompile(sb.String()))
}
}
default:
sb := new(strings.Builder)
first := true
for _, pat := range patterns {
if !first {
sb.WriteString("|")
}
if appendPatternRegexp(sb, pat, atom.Modifiers, atom.isMessage()) {
first = false
}
}
compiled.regexp = []*regexp.Regexp{regexp.MustCompile(sb.String())}
}
modifiersCopy := slices.Clone(atom.Modifiers)
patternsCopy := slices.Clone(patterns)
// Update cache.
atom.mu.Lock()
if !atom.lockedCompileUpToDate(patterns) {
atom.compiledIsMessage = atom.isMessage()
atom.compiledModifiers = modifiersCopy
atom.compiledPatterns = patternsCopy
atom.compiled = compiled
}
atom.mu.Unlock()
return compiled
}
func (atom *SearchAtom) isMessage() bool {
return atom.Field == ""
}
func (atom *SearchAtom) lockedCompileUpToDate(patterns []string) bool {
return atom.isMessage() == atom.compiledIsMessage &&
slices.Equal(atom.compiledPatterns, patterns) &&
slices.Equal(atom.compiledModifiers, atom.compiledModifiers)
}
// appendPatternRegexp writes a regular expression equivalent to pattern
// to the given string builder.
// appendPatternRegexp assumes that the pattern is valid.
// appendPatternRegexp reports whether the pattern was valid.
func appendPatternRegexp(sb *strings.Builder, pattern string, modifiers []string, isMessage bool) bool {
if slices.Contains(modifiers, "re") {
if _, err := regexp.Compile(pattern); err != nil {
return false
}
sb.WriteString("(?:")
sb.WriteString(pattern)
sb.WriteString(")")
return true
}
if slices.Contains(modifiers, "base64offset") {
permutes := base64permute(pattern)
sb.WriteString("(?:")
sb.WriteString(strings.Join(permutes, "|"))
sb.WriteString(")")
return true
}
contains := slices.Contains(modifiers, "contains")
sb.WriteString("(?i:") // Case-insensitive, non-capturing group.
if !isMessage && !contains && !slices.Contains(modifiers, "endswith") {
sb.WriteString("^")
}
sb.WriteString("(?:")
if slices.Contains(modifiers, "base64") {
base64encoded := base64.RawStdEncoding.EncodeToString([]byte(pattern))
sb.WriteString(base64encoded)
} else {
if slices.Contains(modifiers, "windash") {
permutations := windashpermute(pattern)
for ix, perm := range permutations {
escapePattern(sb, perm)
if ix != len(permutations)-1 {
sb.WriteString("|")
}
}
} else {
escapePattern(sb, pattern)
}
}
sb.WriteString(")")
if !isMessage && !contains && !slices.Contains(modifiers, "startswith") {
sb.WriteString("$")
}
sb.WriteString(")") // Close non-capturing group.
return true
}
func escapePattern(sb *strings.Builder, pattern string) {
for i := 0; i < len(pattern); i++ {
switch c := pattern[i]; c {
case '?':
sb.WriteString(".")
case '*':
sb.WriteString(".*")
case '\\':
if i+1 >= len(pattern) {
sb.WriteString(`\\`)
continue
}
switch pattern[i+1] {
case '?', '*', '\\':
sb.WriteByte('\\')
sb.WriteByte(pattern[i+1])
i++
default:
// "Plain backslash not followed by a wildcard can be expressed as single \".
sb.WriteString(`\\`)
}
default:
appendQuoteMeta(sb, pattern[i:i+1])
}
}
}
func cutPlaceholder(s string) (_ string, ok bool) {
if len(s) < 2 || s[0] != '%' || s[len(s)-1] != '%' {
return "", false
}
return s[1 : len(s)-1], true
}
// Status is an enumeration of [Rule] stability classifications.
type Status string
// Defined statuses.
const (
// Stable indicates that the rule didn't produce any obvious false positives
// in multiple environments over a long period of time.
Stable Status = "stable"
// Test indicates that the rule doesn't show any obvious false positives
// on a limited set of test systems.
Test Status = "test"
// Experimental indicates a new rule that hasn't been tested outside of lab environments
// and could lead to many false positives.
Experimental Status = "experimental"
// Deprecated indicates the rule is to replace or cover another one.
// The link between both rules is made via the related field.
Deprecated Status = "deprecated"
// Unsupported indicates the rule can not be used in its current state
// (special correlation log, home-made fields, etc.).
Unsupported Status = "unsupported"
)
// IsKnown reports whether the status string matches one of the known constants.
func (status Status) IsKnown() bool {
return status == Stable ||
status == Test ||
status == Experimental ||
status == Deprecated ||
status == Unsupported
}
// Level is an enumeration of the criticalities of a triggered [Rule].
type Level string
// Defined levels.
const (
// Informational indicates a rule is intended for enrichment of events,
// e.g. by tagging them.
// No case or alerting should be triggered by such rules
// because it is expected that a huge amount of events will match these rules.
Informational Level = "informational"
// Low indicates that a rule is a notable event but rarely an incident.
// Low rated events can be relevant in high numbers or combination with others.
// Immediate reaction shouldn't be necessary, but a regular review is recommended.
Low Level = "low"
// Medium indicates that a rule is a relevant event that should be reviewed manually
// on a more frequent basis.
Medium Level = "medium"
// High indicates that a rule is a relevant event that should trigger an internal alert
// and requires a prompt review.
High Level = "high"
// Critical indicates that a rule is a highly relevant event that indicates an incident.
// Critical events should be reviewed immediately.
// It is used only for cases in which probability borders certainty.
Critical Level = "critical"
)
// IsKnown reports whether the level string matches one of the known constants.
func (level Level) IsKnown() bool {
return level == Informational ||
level == Low ||
level == Medium ||
level == High ||
level == Critical
}
// Relation is a reference to another related rule.
type Relation struct {
ID string
Type RelationType
}
// RelationType is an enumeration of relation types.
type RelationType string
// Defined relation types.
const (
// Derived signals the rule was derived from the referred rule or rules,
// which may remain active.
Derived RelationType = "derived"
// Obsoletes signals the rule obsoletes the referred rule or rules,
// which aren't used anymore.
Obsoletes RelationType = "obsoletes"
// Merged signals the rule was merged from the referred rules.
// The rules may be still existing and in use.
Merged RelationType = "merged"
// Renamed signals the rule had previously the referred identifier or identifiers
// but was renamed for whatever reason,
// e.g. from a private naming scheme to UUIDs, to resolve collisions etc.
// It's not expected that a rule with this id exists anymore.
Renamed RelationType = "renamed"
// Similar is used to relate similar rules to each other
// (e.g. same detection content applied to different log sources,
// rule that is a modified version of another rule with a different level).
Similar RelationType = "similar"
)
// IsKnown reports whether the relation type string matches one of the known constants.
func (typ RelationType) IsKnown() bool {
return typ == Derived ||
typ == Obsoletes ||
typ == Merged ||
typ == Renamed ||
typ == Similar
}