get-woke · cognitivegears · Aug 12, 2021 · Aug 12, 2021 · Sep 5, 2021 · Sep 5, 2021
diff --git a/docs/rules.md b/docs/rules.md
@@ -13,6 +13,7 @@ rules:
       - white-list
     alternatives:
       - allowlist
+    # regex: regexterm
     note: An optional description why these terms are not inclusive. It can be optionally included in the output message.
     # options:
     #   word_boundary: false
@@ -27,6 +28,13 @@ A set of default rules is provided in [`pkg/rule/default.yaml`]({{config.repo_ur
 !!! tip
     If you copy these rules into your config file, be sure to put them under the `rules:` key.
 
+## `regex`
+
+Allows the definition of a regular expression (regex) directly. If specified,
+any terms in the rule definition as well as word boundary options are ignored.
+This is an advanced feature. Only use non-capturing groups in patterns.
+Look-around assertions are not supported.
+
 ## Options
 
 You can configure options for each rule. Add an `options` key to your rule definition to customize.

diff --git a/docs/snippets/woke.md b/docs/snippets/woke.md
@@ -7,6 +7,7 @@ Check for usage of non-inclusive language in your code and provide alternatives
 
 ### Synopsis
 
+
 woke is a linter that will check your source code for usage of non-inclusive
 language and provide suggestions for alternatives. Rules can be customized
 to suit your needs.
@@ -29,4 +30,4 @@ woke [globs ...] [flags]
       --stdin               Read from stdin
 ```
 
-###### Auto generated by spf13/cobra on 13-Aug-2021
+###### Auto generated by spf13/cobra on 5-Sep-2021
diff --git a/docs/usage.md b/docs/usage.md
@@ -65,6 +65,7 @@ The following fields are supported, depending on format:
 | alternative  | List of alternative terms to use instead          |
 | note         | Note about reasoning for inclusion                |
 | severity     | From config, one of "error", "warning", or "info" |
+| regex        | Optional regular expression defined by rule       |
 | optionbool   | Option value, true or false                       |
 | linecontents | Contents of the line with finding                 |
 | lineno       | Line number, 1 based                              |
@@ -141,6 +142,7 @@ Outputs the results as a series of [`json`](https://www.json.org/json-en.html) f
           "<alternative>",
           ...
         ],
+        "Regex": "<regex>",
         "Note": "<note>",
         "Severity": "<severity>",
         "Options": {

diff --git a/pkg/printer/json_test.go b/pkg/printer/json_test.go
@@ -12,7 +12,7 @@ func TestJSON_Print_JSON(t *testing.T) {
 	res := generateFileResult()
 	p := NewJSON(buf)
 	assert.NoError(t, p.Print(res))
-	expected := "{\"Filename\":\"foo.txt\",\"Results\":[{\"Rule\":{\"Name\":\"whitelist\",\"Terms\":[\"whitelist\",\"white-list\",\"whitelisted\",\"white-listed\"],\"Alternatives\":[\"allowlist\"],\"Note\":\"\",\"Severity\":\"warning\",\"Options\":{\"WordBoundary\":false,\"WordBoundaryStart\":false,\"WordBoundaryEnd\":false,\"IncludeNote\":null,\"Categories\":null}},\"Finding\":\"whitelist\",\"Line\":\"this whitelist must change\",\"StartPosition\":{\"Filename\":\"foo.txt\",\"Offset\":0,\"Line\":1,\"Column\":6},\"EndPosition\":{\"Filename\":\"foo.txt\",\"Offset\":0,\"Line\":1,\"Column\":15},\"Reason\":\"`whitelist` may be insensitive, use `allowlist` instead\"}]}\n"
+	expected := "{\"Filename\":\"foo.txt\",\"Results\":[{\"Rule\":{\"Name\":\"whitelist\",\"Terms\":[\"whitelist\",\"white-list\",\"whitelisted\",\"white-listed\"],\"Alternatives\":[\"allowlist\"],\"Regex\":\"\",\"Note\":\"\",\"Severity\":\"warning\",\"Options\":{\"WordBoundary\":false,\"WordBoundaryStart\":false,\"WordBoundaryEnd\":false,\"IncludeNote\":null,\"Categories\":null}},\"Finding\":\"whitelist\",\"Line\":\"this whitelist must change\",\"StartPosition\":{\"Filename\":\"foo.txt\",\"Offset\":0,\"Line\":1,\"Column\":6},\"EndPosition\":{\"Filename\":\"foo.txt\",\"Offset\":0,\"Line\":1,\"Column\":15},\"Reason\":\"`whitelist` may be insensitive, use `allowlist` instead\"}]}\n"
 	got := buf.String()
 	assert.Equal(t, expected, got)
 }
@@ -57,6 +57,6 @@ func TestJSON_Multiple(t *testing.T) {
 	p.End()
 	got := buf.String()
 
-	expected := "{\"Filename\":\"foo.txt\",\"Results\":[{\"Rule\":{\"Name\":\"whitelist\",\"Terms\":[\"whitelist\",\"white-list\",\"whitelisted\",\"white-listed\"],\"Alternatives\":[\"allowlist\"],\"Note\":\"\",\"Severity\":\"warning\",\"Options\":{\"WordBoundary\":false,\"WordBoundaryStart\":false,\"WordBoundaryEnd\":false,\"IncludeNote\":null,\"Categories\":null}},\"Finding\":\"whitelist\",\"Line\":\"this whitelist must change\",\"StartPosition\":{\"Filename\":\"foo.txt\",\"Offset\":0,\"Line\":1,\"Column\":6},\"EndPosition\":{\"Filename\":\"foo.txt\",\"Offset\":0,\"Line\":1,\"Column\":15},\"Reason\":\"`whitelist` may be insensitive, use `allowlist` instead\"}]}\n{\"Filename\":\"bar.txt\",\"Results\":[{\"Rule\":{\"Name\":\"slave\",\"Terms\":[\"slave\"],\"Alternatives\":[\"follower\"],\"Note\":\"\",\"Severity\":\"error\",\"Options\":{\"WordBoundary\":false,\"WordBoundaryStart\":false,\"WordBoundaryEnd\":false,\"IncludeNote\":null,\"Categories\":null}},\"Finding\":\"slave\",\"Line\":\"this slave term must change\",\"StartPosition\":{\"Filename\":\"bar.txt\",\"Offset\":0,\"Line\":1,\"Column\":6},\"EndPosition\":{\"Filename\":\"bar.txt\",\"Offset\":0,\"Line\":1,\"Column\":15},\"Reason\":\"`slave` may be insensitive, use `follower` instead\"}]}\n{\"Filename\":\"barfoo.txt\",\"Results\":[{\"Rule\":{\"Name\":\"test\",\"Terms\":[\"test\"],\"Alternatives\":[\"alternative\"],\"Note\":\"\",\"Severity\":\"info\",\"Options\":{\"WordBoundary\":false,\"WordBoundaryStart\":false,\"WordBoundaryEnd\":false,\"IncludeNote\":null,\"Categories\":null}},\"Finding\":\"test\",\"Line\":\"this test must change\",\"StartPosition\":{\"Filename\":\"barfoo.txt\",\"Offset\":0,\"Line\":1,\"Column\":6},\"EndPosition\":{\"Filename\":\"barfoo.txt\",\"Offset\":0,\"Line\":1,\"Column\":15},\"Reason\":\"`test` may be insensitive, use `alternative` instead\"}]}\n"
+	expected := "{\"Filename\":\"foo.txt\",\"Results\":[{\"Rule\":{\"Name\":\"whitelist\",\"Terms\":[\"whitelist\",\"white-list\",\"whitelisted\",\"white-listed\"],\"Alternatives\":[\"allowlist\"],\"Regex\":\"\",\"Note\":\"\",\"Severity\":\"warning\",\"Options\":{\"WordBoundary\":false,\"WordBoundaryStart\":false,\"WordBoundaryEnd\":false,\"IncludeNote\":null,\"Categories\":null}},\"Finding\":\"whitelist\",\"Line\":\"this whitelist must change\",\"StartPosition\":{\"Filename\":\"foo.txt\",\"Offset\":0,\"Line\":1,\"Column\":6},\"EndPosition\":{\"Filename\":\"foo.txt\",\"Offset\":0,\"Line\":1,\"Column\":15},\"Reason\":\"`whitelist` may be insensitive, use `allowlist` instead\"}]}\n{\"Filename\":\"bar.txt\",\"Results\":[{\"Rule\":{\"Name\":\"slave\",\"Terms\":[\"slave\"],\"Alternatives\":[\"follower\"],\"Regex\":\"\",\"Note\":\"\",\"Severity\":\"error\",\"Options\":{\"WordBoundary\":false,\"WordBoundaryStart\":false,\"WordBoundaryEnd\":false,\"IncludeNote\":null,\"Categories\":null}},\"Finding\":\"slave\",\"Line\":\"this slave term must change\",\"StartPosition\":{\"Filename\":\"bar.txt\",\"Offset\":0,\"Line\":1,\"Column\":6},\"EndPosition\":{\"Filename\":\"bar.txt\",\"Offset\":0,\"Line\":1,\"Column\":15},\"Reason\":\"`slave` may be insensitive, use `follower` instead\"}]}\n{\"Filename\":\"barfoo.txt\",\"Results\":[{\"Rule\":{\"Name\":\"test\",\"Terms\":[\"test\"],\"Alternatives\":[\"alternative\"],\"Regex\":\"\",\"Note\":\"\",\"Severity\":\"info\",\"Options\":{\"WordBoundary\":false,\"WordBoundaryStart\":false,\"WordBoundaryEnd\":false,\"IncludeNote\":null,\"Categories\":null}},\"Finding\":\"test\",\"Line\":\"this test must change\",\"StartPosition\":{\"Filename\":\"barfoo.txt\",\"Offset\":0,\"Line\":1,\"Column\":6},\"EndPosition\":{\"Filename\":\"barfoo.txt\",\"Offset\":0,\"Line\":1,\"Column\":15},\"Reason\":\"`test` may be insensitive, use `alternative` instead\"}]}\n"
 	assert.Equal(t, expected, got)
 }
diff --git a/pkg/rule/rule.go b/pkg/rule/rule.go
@@ -17,6 +17,7 @@ type Rule struct {
 	Name         string   `yaml:"name"`
 	Terms        []string `yaml:"terms"`
 	Alternatives []string `yaml:"alternatives"`
+	Regex        string   `yaml:"regex"`
 	Note         string   `yaml:"note"`
 	Severity     Severity `yaml:"severity"`
 	Options      Options  `yaml:"options"`
@@ -81,8 +82,12 @@ func (r *Rule) SetOptions(o Options) {
 }
 
 func (r *Rule) setRegex() {
-	group := strings.Join(escape(r.Terms), "|")
-	r.re = regexp.MustCompile(fmt.Sprintf(r.regexString(), group))
+	if len(r.Regex) != 0 {
+		r.re = regexp.MustCompile(fmt.Sprintf(`(%s)`, r.Regex))
+	} else {
+		group := strings.Join(escape(r.Terms), "|")
+		r.re = regexp.MustCompile(fmt.Sprintf(r.regexString(), group))
+	}
 }
 
 func (r *Rule) regexString() string {
@@ -223,7 +228,7 @@ func maskInlineIgnore(line string) string {
 // which is helpful for disabling default rules. Eventually, there should be a better
 // way to disable a default rule, and then, if a rule has no Terms, it falls back to the Name.
 func (r *Rule) Disabled() bool {
-	return len(r.Terms) == 0
+	return len(r.Terms) == 0 && len(r.Regex) == 0
 }
 
 // SetIncludeNote populates IncludeNote attributte in Options

diff --git a/pkg/rule/rule_test.go b/pkg/rule/rule_test.go
@@ -21,6 +21,25 @@ func testRule() Rule {
 	}
 }
 
+func testRegexRule() Rule {
+	return Rule{
+		Name:         "ruleregex",
+		Regex:        `\d+`,
+		Alternatives: []string{"alt-regex1", "alt-regex-1"},
+		Severity:     SevWarn,
+	}
+}
+
+func testInvalidRegexRule() Rule {
+	r := Rule{
+		Name:         "invalidrule",
+		Regex:        `(`,
+		Alternatives: []string{"alt-rule1", "alt-rule-1"},
+		Severity:     SevWarn,
+	}
+	return r
+}
+
 func TestRule_FindMatchIndexes(t *testing.T) {
 	tests := []struct {
 		text       string
@@ -49,6 +68,39 @@ func TestRule_FindMatchIndexes(t *testing.T) {
 	assert.Equal(t, [][]int(nil), e.FindMatchIndexes("rule1"))
 }
 
+func TestRule_InvalidRegexRule(t *testing.T) {
+	// turn off the panic
+	defer func() { _ = recover() }()
+
+	r := testInvalidRegexRule()
+
+	// Verify rule is compiled - should panic
+	r.setRegex()
+
+	// Validate that terms are now empty / rule is disabled
+	t.Errorf("Invalid rule should have panicked")
+}
+
+func TestRule_FindMatchRegexIndexes(t *testing.T) {
+	tests := []struct {
+		text       string
+		expectedRe [][]int
+	}{
+		{"this string has 123456 and 56789 included", [][]int{{16, 22}, {27, 32}}},
+		{"this string does not have any findings", [][]int(nil)},
+		{`this string has finding with \d+ \d+`, [][]int(nil)},
+	}
+
+	for _, test := range tests {
+		r := testRegexRule()
+		got := r.FindMatchIndexes(test.text)
+		assert.Equal(t, test.expectedRe, got)
+	}
+
+	e := Rule{Name: "rule1"}
+	assert.Equal(t, [][]int(nil), e.FindMatchIndexes("rule1"))
+}
+
 func TestRule_Reason(t *testing.T) {
 	r := testRule()
 	assert.Equal(t, "`rule-1` may be insensitive, use `alt-rule1`, `alt-rule-1` instead", r.Reason("rule-1"))
@@ -133,6 +185,11 @@ func TestRule_regexString(t *testing.T) {
 			rule:     testRuleWithOptions(Options{WordBoundary: true}),
 			expected: `(?i)\b(%s)\b`,
 		},
+		{
+			desc:     "regex rule",
+			rule:     testRegexRule(),
+			expected: `(?i)(%s)`,
+		},
 		{
 			desc:     "word boundary start",
 			rule:     testRuleWithOptions(Options{WordBoundaryStart: true}),