diff --git a/hack/docs/Adding_Detectors_Internal.md b/hack/docs/Adding_Detectors_Internal.md index 02d4f46e45eb..4b0685bedd8b 100644 --- a/hack/docs/Adding_Detectors_Internal.md +++ b/hack/docs/Adding_Detectors_Internal.md @@ -62,15 +62,31 @@ If you think that something should be included outside of these guidelines, plea 1. Add the test secret to GCP Secrets. See [managing test secrets](#managing-test-secrets) 2. Update the pattern regex and keywords. Try iterating with [regex101.com](http://regex101.com/). 3. Update the verifier code to use a non-destructive API call that can determine whether the secret is valid or not. + * Make sure you understand [verification indeterminacy](#verification-indeterminacy). 4. Update the tests with these test cases at minimum: 1. Found and verified (using a credential loaded from GCP Secrets) - 2. Found and unverified - 3. Not found - 4. Any false positive cases that you come across + 2. Found and unverified (determinately, i.e. the secret is invalid) + 3. Found and unverified (indeterminately due to timeout) + 4. Found and unverified (indeterminately due to an unexpected API response) + 5. Not found + 6. Any false positive cases that you come across 5. Create a merge request for review. CI tests must be passing. ## Addendum +### Verification indeterminacy + +There are two types of reasons that secret verification can fail: +* The candidate secret is not actually a valid secret. +* Something went wrong in the process unrelated to the candidate secret, such as a transient network error or an unexpected API response. + +In Trufflehog parlance, the first type of verification response is called _determinate_ and the second type is called _indeterminate_. Verification code should distinguish between the two by returning an error object in the result struct **only** for indeterminate failures. In general, a verifier should return an error (indicating an indeterminate failure) in all cases that haven't been explicitly identified as determinate failure states. + +For example, consider a hypothetical authentication endpoint that returns `200 OK` for valid credentials and `403 Forbidden` for invalid credentials. The verifier for this endpoint could make an HTTP request and use the response status code to decide what to return: +* A `200` response would indicate that verification succeeded. (Or maybe any `2xx` response.) +* A `403` response would indicate that verification failed **determinately** and no error object should be returned. +* Any other response would indicate that verification failed **indeterminately** and an error object should be returned. + ### Managing Test Secrets Do not embed test credentials in the test code. Instead, use GCP Secrets Manager. diff --git a/hack/docs/Adding_Detectors_external.md b/hack/docs/Adding_Detectors_external.md index 8457463da27a..fc9bf42e05c2 100644 --- a/hack/docs/Adding_Detectors_external.md +++ b/hack/docs/Adding_Detectors_external.md @@ -62,15 +62,31 @@ If you think that something should be included outside of these guidelines, plea 1. Create a [test secrets file, and export the variable](#using-a-test-secret-file) 2. Update the pattern regex and keywords. Try iterating with [regex101.com](http://regex101.com/). 3. Update the verifier code to use a non-destructive API call that can determine whether the secret is valid or not. + * Make sure you understand [verification indeterminacy](#verification-indeterminacy). 4. Update the tests with these test cases at minimum: 1. Found and verified (using a credential loaded from GCP Secrets) - 2. Found and unverified - 3. Not found - 4. Any false positive cases that you come across + 2. Found and unverified (determinately, i.e. the secret is invalid) + 3. Found and unverified (indeterminately due to timeout) + 4. Found and unverified (indeterminately due to an unexpected API response) + 5. Not found + 6. Any false positive cases that you come across 5. Create a pull request for review. ## Addendum +### Verification indeterminacy + +There are two types of reasons that secret verification can fail: +* The candidate secret is not actually a valid secret. +* Something went wrong in the process unrelated to the candidate secret, such as a transient network error or an unexpected API response. + +In Trufflehog parlance, the first type of verification response is called _determinate_ and the second type is called _indeterminate_. Verification code should distinguish between the two by returning an error object in the result struct **only** for indeterminate failures. In general, a verifier should return an error (indicating an indeterminate failure) in all cases that haven't been explicitly identified as determinate failure states. + +For example, consider a hypothetical authentication endpoint that returns `200 OK` for valid credentials and `403 Forbidden` for invalid credentials. The verifier for this endpoint could make an HTTP request and use the response status code to decide what to return: +* A `200` response would indicate that verification succeeded. (Or maybe any `2xx` response.) +* A `403` response would indicate that verification failed **determinately** and no error object should be returned. +* Any other response would indicate that verification failed **indeterminately** and an error object should be returned. + ### Using a test secret file 1. Create a file called `.env` with this env file format: diff --git a/pkg/common/http.go b/pkg/common/http.go index 6b75b46665ee..56e587fde354 100644 --- a/pkg/common/http.go +++ b/pkg/common/http.go @@ -3,6 +3,7 @@ package common import ( "crypto/tls" "crypto/x509" + "io" "net" "net/http" "strings" @@ -75,6 +76,14 @@ func PinnedCertPool() *x509.CertPool { return trustedCerts } +type FakeTransport struct { + CreateResponse func(req *http.Request) (*http.Response, error) +} + +func (t FakeTransport) RoundTrip(req *http.Request) (*http.Response, error) { + return t.CreateResponse(req) +} + type CustomTransport struct { T http.RoundTripper } @@ -91,6 +100,21 @@ func NewCustomTransport(T http.RoundTripper) *CustomTransport { return &CustomTransport{T} } +func ConstantStatusHttpClient(statusCode int) *http.Client { + return &http.Client{ + Timeout: DefaultResponseTimeout, + Transport: FakeTransport{ + CreateResponse: func(req *http.Request) (*http.Response, error) { + return &http.Response{ + Request: req, + Body: io.NopCloser(strings.NewReader("")), + StatusCode: statusCode, + }, nil + }, + }, + } +} + func PinnedRetryableHttpClient() *http.Client { httpClient := retryablehttp.NewClient() httpClient.Logger = nil @@ -152,9 +176,9 @@ func SaneHttpClient() *http.Client { } // SaneHttpClientTimeOut adds a custom timeout for some scanners -func SaneHttpClientTimeOut(timeOutSeconds int64) *http.Client { +func SaneHttpClientTimeOut(timeout time.Duration) *http.Client { httpClient := &http.Client{} - httpClient.Timeout = time.Second * time.Duration(timeOutSeconds) + httpClient.Timeout = timeout httpClient.Transport = NewCustomTransport(nil) return httpClient } diff --git a/pkg/detectors/alchemy/alchemy.go b/pkg/detectors/alchemy/alchemy.go index 09fd02966f18..3834edb8df53 100644 --- a/pkg/detectors/alchemy/alchemy.go +++ b/pkg/detectors/alchemy/alchemy.go @@ -2,6 +2,7 @@ package alchemy import ( "context" + "fmt" "net/http" "regexp" "strings" @@ -11,14 +12,15 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" ) -type Scanner struct{} +type Scanner struct { + client *http.Client +} // Ensure the Scanner satisfies the interface at compile time. var _ detectors.Detector = (*Scanner)(nil) var ( - client = common.SaneHttpClient() - + defaultClient = common.SaneHttpClient() // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"alchemy"}) + `\b([0-9a-zA-Z]{23}_[0-9a-zA-Z]{8})\b`) ) @@ -47,6 +49,10 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result } if verify { + client := s.client + if client == nil { + client = defaultClient + } req, err := http.NewRequestWithContext(ctx, "GET", "https://eth-mainnet.g.alchemy.com/v2/"+resMatch+"/getNFTs/?owner=vitalik.eth", nil) if err != nil { continue @@ -61,7 +67,13 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result if detectors.IsKnownFalsePositive(resMatch, detectors.DefaultFalsePositives, true) { continue } + + if res.StatusCode != 401 { + s1.VerificationError = fmt.Errorf("request to %v returned unexpected status %d", res.Request.URL, res.StatusCode) + } } + } else { + s1.VerificationError = err } } diff --git a/pkg/detectors/alchemy/alchemy_test.go b/pkg/detectors/alchemy/alchemy_test.go index 04929d4ea2b1..ed2ae7b8dbef 100644 --- a/pkg/detectors/alchemy/alchemy_test.go +++ b/pkg/detectors/alchemy/alchemy_test.go @@ -32,11 +32,12 @@ func TestAlchemy_FromChunk(t *testing.T) { verify bool } tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool + name string + s Scanner + args args + want []detectors.Result + wantErr bool + wantVerificationErr bool }{ { name: "found, verified", @@ -52,7 +53,8 @@ func TestAlchemy_FromChunk(t *testing.T) { Verified: true, }, }, - wantErr: false, + wantErr: false, + wantVerificationErr: false, }, { name: "found, unverified", @@ -68,7 +70,8 @@ func TestAlchemy_FromChunk(t *testing.T) { Verified: false, }, }, - wantErr: false, + wantErr: false, + wantVerificationErr: false, }, { name: "not found", @@ -78,14 +81,48 @@ func TestAlchemy_FromChunk(t *testing.T) { data: []byte("You cannot find the secret within"), verify: true, }, - want: nil, - wantErr: false, + want: nil, + wantErr: false, + wantVerificationErr: false, + }, + { + name: "found, would be verified if not for timeout", + s: Scanner{client: common.SaneHttpClientTimeOut(1 * time.Microsecond)}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a alchemy secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Alchemy, + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: true, + }, + { + name: "found, verified but unexpected api surface", + s: Scanner{client: common.ConstantStatusHttpClient(404)}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a alchemy secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Alchemy, + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) if (err != nil) != tt.wantErr { t.Errorf("Alchemy.FromData() error = %v, wantErr %v", err, tt.wantErr) return @@ -95,6 +132,10 @@ func TestAlchemy_FromChunk(t *testing.T) { t.Fatalf("no raw secret present: \n %+v", got[i]) } got[i].Raw = nil + if (got[i].VerificationError != nil) != tt.wantVerificationErr { + t.Fatalf("verification error = %v, wantVerificationError %v", got[i].VerificationError, tt.wantVerificationErr) + } + got[i].VerificationError = nil } if diff := pretty.Compare(got, tt.want); diff != "" { t.Errorf("Alchemy.FromData() %s diff: (-got +want)\n%s", tt.name, diff) diff --git a/pkg/detectors/getemail/getemail.go b/pkg/detectors/getemail/getemail.go index 6b75faa6e931..4bf56ee19c40 100644 --- a/pkg/detectors/getemail/getemail.go +++ b/pkg/detectors/getemail/getemail.go @@ -7,6 +7,7 @@ import ( "net/http" "regexp" "strings" + "time" "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" @@ -19,7 +20,7 @@ type Scanner struct{} var _ detectors.Detector = (*Scanner)(nil) var ( - client = common.SaneHttpClientTimeOut(5) + client = common.SaneHttpClientTimeOut(5 * time.Second) // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"getemail"}) + `\b([a-zA-Z0-9-]{20})\b`) diff --git a/pkg/detectors/microsoftteamswebhook/microsoftteamswebhook.go b/pkg/detectors/microsoftteamswebhook/microsoftteamswebhook.go index d1687269f320..0b3e9fa9754f 100644 --- a/pkg/detectors/microsoftteamswebhook/microsoftteamswebhook.go +++ b/pkg/detectors/microsoftteamswebhook/microsoftteamswebhook.go @@ -6,6 +6,7 @@ import ( "net/http" "regexp" "strings" + "time" "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" @@ -18,7 +19,7 @@ type Scanner struct{} var _ detectors.Detector = (*Scanner)(nil) var ( - client = common.SaneHttpClientTimeOut(5) + client = common.SaneHttpClientTimeOut(5 * time.Second) // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. keyPat = regexp.MustCompile(`(https:\/\/[a-zA-Z-0-9]+\.webhook\.office\.com\/webhookb2\/[a-zA-Z-0-9]{8}-[a-zA-Z-0-9]{4}-[a-zA-Z-0-9]{4}-[a-zA-Z-0-9]{4}-[a-zA-Z-0-9]{12}\@[a-zA-Z-0-9]{8}-[a-zA-Z-0-9]{4}-[a-zA-Z-0-9]{4}-[a-zA-Z-0-9]{4}-[a-zA-Z-0-9]{12}\/IncomingWebhook\/[a-zA-Z-0-9]{32}\/[a-zA-Z-0-9]{8}-[a-zA-Z-0-9]{4}-[a-zA-Z-0-9]{4}-[a-zA-Z-0-9]{4}-[a-zA-Z-0-9]{12})`) diff --git a/pkg/detectors/scrapersite/scrapersite.go b/pkg/detectors/scrapersite/scrapersite.go index bbe23122df6e..1295c831a6be 100644 --- a/pkg/detectors/scrapersite/scrapersite.go +++ b/pkg/detectors/scrapersite/scrapersite.go @@ -7,6 +7,7 @@ import ( "net/http" "regexp" "strings" + "time" "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" @@ -19,7 +20,7 @@ type Scanner struct{} var _ detectors.Detector = (*Scanner)(nil) var ( - client = common.SaneHttpClientTimeOut(10) + client = common.SaneHttpClientTimeOut(10 * time.Second) // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"scrapersite"}) + `\b([a-zA-Z0-9]{45})\b`) diff --git a/pkg/detectors/screenshotlayer/screenshotlayer.go b/pkg/detectors/screenshotlayer/screenshotlayer.go index 4bd0ab8563d0..e06f7291fd28 100644 --- a/pkg/detectors/screenshotlayer/screenshotlayer.go +++ b/pkg/detectors/screenshotlayer/screenshotlayer.go @@ -7,6 +7,7 @@ import ( "net/http" "regexp" "strings" + "time" "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" @@ -19,7 +20,7 @@ type Scanner struct{} var _ detectors.Detector = (*Scanner)(nil) var ( - client = common.SaneHttpClientTimeOut(10) + client = common.SaneHttpClientTimeOut(10 * time.Second) // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"screenshotlayer"}) + `\b([a-zA-Z0-9_]{32})\b`) diff --git a/pkg/detectors/zenserp/zenserp.go b/pkg/detectors/zenserp/zenserp.go index ebf36bf9ef22..5ae78e9458b1 100644 --- a/pkg/detectors/zenserp/zenserp.go +++ b/pkg/detectors/zenserp/zenserp.go @@ -6,6 +6,7 @@ import ( "net/http" "regexp" "strings" + "time" "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" @@ -18,7 +19,7 @@ type Scanner struct{} var _ detectors.Detector = (*Scanner)(nil) var ( - client = common.SaneHttpClientTimeOut(5) + client = common.SaneHttpClientTimeOut(5 * time.Second) // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"zenserp"}) + `\b([0-9a-z-]{36})\b`)