get-woke · cognitivegears · Aug 12, 2021 · Aug 12, 2021 · Sep 5, 2021 · Sep 5, 2021
diff --git a/docs/rules.md b/docs/rules.md
@@ -19,6 +19,7 @@ rules:
     #   word_boundary_start: false
     #   word_boundary_end: false
     #   include_note: false
+    #   regex_terms: false
 ```
 
 A set of default rules is provided in [`pkg/rule/default.yaml`]({{config.repo_url}}blob/main/pkg/rule/default.yaml).
@@ -60,6 +61,14 @@ You can configure options for each rule. Add an `options` key to your rule defin
 * If `false`, the rule note will not be included in the output message
 * If `not set`, `include_note` in your `woke` config file (ie `.woke.yml`) regulates if the note should be included in the output message (default: `false`).
 
+### `regex_terms`
+
+:octicons-milestone-24: Default: `false`
+
+* If `true`, terms will be evaluated as regular expressions
+* If `false`, terms will be treated as plain-text values
+* **NOTE** this is an advanced feature. Rules will be skipped if they do not compile. Only use non-capturing groups in patterns. Look-around assertions are not supported.
+
 ## Disabling Default Rules
 
 You can disable default rules by providing a rule in your `woke` config file (ie `.woke.yml`), with no terms or alternatives.

diff --git a/docs/usage.md b/docs/usage.md
@@ -168,6 +168,7 @@ Outputs the results as a series of [`json`](https://www.json.org/json-en.html) f
           "WordBoundary": <optionbool>,
           "WordBoundaryStart": <optionbool>,
           "WordBoundaryEnd": <optionbool>,
+          "RegexTerms": <optionbool>,
           "IncludeNote": <optionbool>
         }
       },

diff --git a/pkg/printer/json_test.go b/pkg/printer/json_test.go
@@ -12,7 +12,7 @@ func TestJSON_Print_JSON(t *testing.T) {
 	res := generateFileResult()
 	p := NewJSON(buf)
 	assert.NoError(t, p.Print(res))
-	expected := "{\"Filename\":\"foo.txt\",\"Results\":[{\"Rule\":{\"Name\":\"whitelist\",\"Terms\":[\"whitelist\",\"white-list\",\"whitelisted\",\"white-listed\"],\"Alternatives\":[\"allowlist\"],\"Note\":\"\",\"Severity\":\"warning\",\"Options\":{\"WordBoundary\":false,\"WordBoundaryStart\":false,\"WordBoundaryEnd\":false,\"IncludeNote\":null}},\"Finding\":\"whitelist\",\"Line\":\"this whitelist must change\",\"StartPosition\":{\"Filename\":\"foo.txt\",\"Offset\":0,\"Line\":1,\"Column\":6},\"EndPosition\":{\"Filename\":\"foo.txt\",\"Offset\":0,\"Line\":1,\"Column\":15},\"Reason\":\"`whitelist` may be insensitive, use `allowlist` instead\"}]}\n"
+	expected := "{\"Filename\":\"foo.txt\",\"Results\":[{\"Rule\":{\"Name\":\"whitelist\",\"Terms\":[\"whitelist\",\"white-list\",\"whitelisted\",\"white-listed\"],\"Alternatives\":[\"allowlist\"],\"Note\":\"\",\"Severity\":\"warning\",\"Options\":{\"WordBoundary\":false,\"WordBoundaryStart\":false,\"WordBoundaryEnd\":false,\"RegexTerms\":false,\"IncludeNote\":null}},\"Finding\":\"whitelist\",\"Line\":\"this whitelist must change\",\"StartPosition\":{\"Filename\":\"foo.txt\",\"Offset\":0,\"Line\":1,\"Column\":6},\"EndPosition\":{\"Filename\":\"foo.txt\",\"Offset\":0,\"Line\":1,\"Column\":15},\"Reason\":\"`whitelist` may be insensitive, use `allowlist` instead\"}]}\n"
 	got := buf.String()
 	assert.Equal(t, expected, got)
 }
@@ -57,6 +57,6 @@ func TestJSON_Multiple(t *testing.T) {
 	p.End()
 	got := buf.String()
 
-	expected := "{\"Filename\":\"foo.txt\",\"Results\":[{\"Rule\":{\"Name\":\"whitelist\",\"Terms\":[\"whitelist\",\"white-list\",\"whitelisted\",\"white-listed\"],\"Alternatives\":[\"allowlist\"],\"Note\":\"\",\"Severity\":\"warning\",\"Options\":{\"WordBoundary\":false,\"WordBoundaryStart\":false,\"WordBoundaryEnd\":false,\"IncludeNote\":null}},\"Finding\":\"whitelist\",\"Line\":\"this whitelist must change\",\"StartPosition\":{\"Filename\":\"foo.txt\",\"Offset\":0,\"Line\":1,\"Column\":6},\"EndPosition\":{\"Filename\":\"foo.txt\",\"Offset\":0,\"Line\":1,\"Column\":15},\"Reason\":\"`whitelist` may be insensitive, use `allowlist` instead\"}]}\n{\"Filename\":\"bar.txt\",\"Results\":[{\"Rule\":{\"Name\":\"slave\",\"Terms\":[\"slave\"],\"Alternatives\":[\"follower\"],\"Note\":\"\",\"Severity\":\"error\",\"Options\":{\"WordBoundary\":false,\"WordBoundaryStart\":false,\"WordBoundaryEnd\":false,\"IncludeNote\":null}},\"Finding\":\"slave\",\"Line\":\"this slave term must change\",\"StartPosition\":{\"Filename\":\"bar.txt\",\"Offset\":0,\"Line\":1,\"Column\":6},\"EndPosition\":{\"Filename\":\"bar.txt\",\"Offset\":0,\"Line\":1,\"Column\":15},\"Reason\":\"`slave` may be insensitive, use `follower` instead\"}]}\n{\"Filename\":\"barfoo.txt\",\"Results\":[{\"Rule\":{\"Name\":\"test\",\"Terms\":[\"test\"],\"Alternatives\":[\"alternative\"],\"Note\":\"\",\"Severity\":\"info\",\"Options\":{\"WordBoundary\":false,\"WordBoundaryStart\":false,\"WordBoundaryEnd\":false,\"IncludeNote\":null}},\"Finding\":\"test\",\"Line\":\"this test must change\",\"StartPosition\":{\"Filename\":\"barfoo.txt\",\"Offset\":0,\"Line\":1,\"Column\":6},\"EndPosition\":{\"Filename\":\"barfoo.txt\",\"Offset\":0,\"Line\":1,\"Column\":15},\"Reason\":\"`test` may be insensitive, use `alternative` instead\"}]}\n"
+	expected := "{\"Filename\":\"foo.txt\",\"Results\":[{\"Rule\":{\"Name\":\"whitelist\",\"Terms\":[\"whitelist\",\"white-list\",\"whitelisted\",\"white-listed\"],\"Alternatives\":[\"allowlist\"],\"Note\":\"\",\"Severity\":\"warning\",\"Options\":{\"WordBoundary\":false,\"WordBoundaryStart\":false,\"WordBoundaryEnd\":false,\"RegexTerms\":false,\"IncludeNote\":null}},\"Finding\":\"whitelist\",\"Line\":\"this whitelist must change\",\"StartPosition\":{\"Filename\":\"foo.txt\",\"Offset\":0,\"Line\":1,\"Column\":6},\"EndPosition\":{\"Filename\":\"foo.txt\",\"Offset\":0,\"Line\":1,\"Column\":15},\"Reason\":\"`whitelist` may be insensitive, use `allowlist` instead\"}]}\n{\"Filename\":\"bar.txt\",\"Results\":[{\"Rule\":{\"Name\":\"slave\",\"Terms\":[\"slave\"],\"Alternatives\":[\"follower\"],\"Note\":\"\",\"Severity\":\"error\",\"Options\":{\"WordBoundary\":false,\"WordBoundaryStart\":false,\"WordBoundaryEnd\":false,\"RegexTerms\":false,\"IncludeNote\":null}},\"Finding\":\"slave\",\"Line\":\"this slave term must change\",\"StartPosition\":{\"Filename\":\"bar.txt\",\"Offset\":0,\"Line\":1,\"Column\":6},\"EndPosition\":{\"Filename\":\"bar.txt\",\"Offset\":0,\"Line\":1,\"Column\":15},\"Reason\":\"`slave` may be insensitive, use `follower` instead\"}]}\n{\"Filename\":\"barfoo.txt\",\"Results\":[{\"Rule\":{\"Name\":\"test\",\"Terms\":[\"test\"],\"Alternatives\":[\"alternative\"],\"Note\":\"\",\"Severity\":\"info\",\"Options\":{\"WordBoundary\":false,\"WordBoundaryStart\":false,\"WordBoundaryEnd\":false,\"RegexTerms\":false,\"IncludeNote\":null}},\"Finding\":\"test\",\"Line\":\"this test must change\",\"StartPosition\":{\"Filename\":\"barfoo.txt\",\"Offset\":0,\"Line\":1,\"Column\":6},\"EndPosition\":{\"Filename\":\"barfoo.txt\",\"Offset\":0,\"Line\":1,\"Column\":15},\"Reason\":\"`test` may be insensitive, use `alternative` instead\"}]}\n"
 	assert.Equal(t, expected, got)
 }
diff --git a/pkg/rule/options.go b/pkg/rule/options.go
@@ -5,5 +5,6 @@ type Options struct {
 	WordBoundary      bool  `yaml:"word_boundary"`
 	WordBoundaryStart bool  `yaml:"word_boundary_start"`
 	WordBoundaryEnd   bool  `yaml:"word_boundary_end"`
+	RegexTerms        bool  `yaml:"regex_terms"`
 	IncludeNote       *bool `yaml:"include_note"`
 }
diff --git a/pkg/rule/rule.go b/pkg/rule/rule.go
@@ -5,6 +5,8 @@ import (
 	"regexp"
 	"strings"
 
+	"github.com/rs/zerolog/log"
+
 	"github.com/get-woke/woke/pkg/util"
 )
 
@@ -81,8 +83,13 @@ func (r *Rule) SetOptions(o Options) {
 }
 
 func (r *Rule) setRegex() {
-	group := strings.Join(escape(r.Terms), "|")
-	r.re = regexp.MustCompile(fmt.Sprintf(r.regexString(), group))
+	var err error
+	group := strings.Join(escape(r, r.Terms), "|")
+	r.re, err = regexp.Compile(fmt.Sprintf(r.regexString(), group))
+	if err != nil {
+		log.Error().Err(err).Str("Rule", r.Name).Msg("Unable to compile regular expression, disabling rule")
+		r.Terms = nil // Disable the rule
+	}
 }
 
 func (r *Rule) regexString() string {
@@ -189,9 +196,13 @@ func IsDirectiveOnlyLine(line string) bool {
 	return !util.ContainsAlphanumeric(leftText)
 }
 
-func escape(ss []string) []string {
+func escape(r *Rule, ss []string) []string {
 	for i, s := range ss {
-		ss[i] = regexp.QuoteMeta(s)
+		if r.Options.RegexTerms {
+			ss[i] = s
+		} else {
+			ss[i] = regexp.QuoteMeta(s)
+		}
 	}
 	return ss
 }

diff --git a/pkg/rule/rule_test.go b/pkg/rule/rule_test.go
@@ -21,6 +21,32 @@ func testRule() Rule {
 	}
 }
 
+func testRegexRuleWithOptions(o Options) Rule {
+	r := testRegexRule()
+	r.SetOptions(o)
+	return r
+}
+
+func testRegexRule() Rule {
+	return Rule{
+		Name:         "ruleregex",
+		Terms:        []string{`\d+`},
+		Alternatives: []string{"alt-regex1", "alt-regex-1"},
+		Severity:     SevWarn,
+	}
+}
+
+func testInvalidRegexRule() Rule {
+	r := Rule{
+		Name:         "invalidrule",
+		Terms:        []string{"("},
+		Alternatives: []string{"alt-rule1", "alt-rule-1"},
+		Severity:     SevWarn,
+	}
+	r.SetOptions(Options{RegexTerms: true})
+	return r
+}
+
 func TestRule_FindMatchIndexes(t *testing.T) {
 	tests := []struct {
 		text       string
@@ -49,6 +75,43 @@ func TestRule_FindMatchIndexes(t *testing.T) {
 	assert.Equal(t, [][]int(nil), e.FindMatchIndexes("rule1"))
 }
 
+func TestRule_InvalidRegexRule(t *testing.T) {
+	r := testInvalidRegexRule()
+
+	// Verify rule is compiled
+	r.setRegex()
+
+	// Validate that terms are now empty / rule is disabled
+	assert.Empty(t, r.Terms)
+	assert.True(t, r.Disabled())
+}
+
+func TestRule_FindMatchRegexIndexes(t *testing.T) {
+	tests := []struct {
+		text       string
+		expected   [][]int
+		expectedRe [][]int
+	}{
+		{"this string has 123456 and 56789 included", [][]int(nil), [][]int{{16, 22}, {27, 32}}},
+		{"this string does not have any findings", [][]int(nil), [][]int(nil)},
+		{`this string has finding with \d+ \d+`, [][]int{{29, 32}, {33, 36}}, [][]int(nil)},
+	}
+	for _, test := range tests {
+		r := testRegexRule() // Default to non regular expression matching
+		got := r.FindMatchIndexes(test.text)
+		assert.Equal(t, test.expected, got)
+	}
+
+	for _, test := range tests {
+		r := testRegexRuleWithOptions(Options{RegexTerms: true})
+		got := r.FindMatchIndexes(test.text)
+		assert.Equal(t, test.expectedRe, got)
+	}
+
+	e := Rule{Name: "rule1"}
+	assert.Equal(t, [][]int(nil), e.FindMatchIndexes("rule1"))
+}
+
 func TestRule_Reason(t *testing.T) {
 	r := testRule()
 	assert.Equal(t, "`rule-1` may be insensitive, use `alt-rule1`, `alt-rule-1` instead", r.Reason("rule-1"))
@@ -133,6 +196,11 @@ func TestRule_regexString(t *testing.T) {
 			rule:     testRuleWithOptions(Options{WordBoundary: true}),
 			expected: `(?i)\b(%s)\b`,
 		},
+		{
+			desc:     "regex rule",
+			rule:     testRegexRuleWithOptions(Options{RegexTerms: true}),
+			expected: `(?i)(%s)`,
+		},
 		{
 			desc:     "word boundary start",
 			rule:     testRuleWithOptions(Options{WordBoundaryStart: true}),