From b57b1c1aa7624ed3386c1a2f2d6d0918fa6c3e83 Mon Sep 17 00:00:00 2001 From: Richard Gomez <32133502+rgmz@users.noreply.github.com> Date: Wed, 18 Oct 2023 17:17:11 -0400 Subject: [PATCH] feat(voiceflow): basic detector (#1900) --- pkg/detectors/voiceflow/voiceflow.go | 99 +++++++ pkg/detectors/voiceflow/voiceflow_test.go | 302 ++++++++++++++++++++++ pkg/engine/defaults.go | 2 + pkg/pb/detectorspb/detectors.pb.go | 8 +- proto/detectors.proto | 1 + 5 files changed, 410 insertions(+), 2 deletions(-) create mode 100644 pkg/detectors/voiceflow/voiceflow.go create mode 100644 pkg/detectors/voiceflow/voiceflow_test.go diff --git a/pkg/detectors/voiceflow/voiceflow.go b/pkg/detectors/voiceflow/voiceflow.go new file mode 100644 index 000000000000..0795abdf497d --- /dev/null +++ b/pkg/detectors/voiceflow/voiceflow.go @@ -0,0 +1,99 @@ +package voiceflow + +import ( + "bytes" + "context" + "fmt" + "io" + "net/http" + "regexp" + "strings" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +type Scanner struct { + client *http.Client +} + +// Ensure the Scanner satisfies the interface at compile time. +var _ detectors.Detector = (*Scanner)(nil) + +var ( + defaultClient = common.SaneHttpClient() + // Reference: https://developer.voiceflow.com/reference/project#dialog-manager-api-keys + // + //TODO: This includes Workspace and Legacy Workspace API keys; I haven't validated whether these actually work. + // https://github.com/voiceflow/general-runtime/blob/master/tests/runtime/lib/DataAPI/utils.unit.ts + keyPat = regexp.MustCompile(`\b(VF\.(?:(?:DM|WS)\.)?[a-fA-F0-9]{24}\.[a-zA-Z0-9]{16})\b`) +) + +// Keywords are used for efficiently pre-filtering chunks. +// Use identifiers in the secret preferably, or the provider name. +func (s Scanner) Keywords() []string { + return []string{"vf", "dm"} +} + +// FromData will find and optionally verify Voiceflow secrets in a given set of bytes. +func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { + dataStr := string(data) + matches := keyPat.FindAllStringSubmatch(dataStr, -1) + + for _, match := range matches { + if len(match) != 2 { + continue + } + resMatch := strings.TrimSpace(match[1]) + + s1 := detectors.Result{ + DetectorType: detectorspb.DetectorType_Voiceflow, + Raw: []byte(resMatch), + } + + if verify { + client := s.client + if client == nil { + client = defaultClient + } + // Fetch the state for a random user. + payload := []byte(`{"question": "why is the sky blue?"}`) + req, err := http.NewRequestWithContext(ctx, "POST", "https://general-runtime.voiceflow.com/knowledge-base/query", bytes.NewBuffer(payload)) + if err != nil { + continue + } + req.Header.Set("Accept", "application/json") + req.Header.Set("Authorization", resMatch) + req.Header.Set("Content-Type", "application/json") + + res, err := client.Do(req) + if err == nil { + if res.StatusCode == http.StatusOK { + s1.Verified = true + } else if res.StatusCode == http.StatusUnauthorized { + // The secret is determinately not verified (nothing to do) + } else { + var buf bytes.Buffer + var bodyString string + _, err = io.Copy(&buf, res.Body) + if err == nil { + bodyString = buf.String() + } + s1.VerificationError = fmt.Errorf("unexpected HTTP response [status=%d, body=%s]", res.StatusCode, bodyString) + } + _ = res.Body.Close() + } else { + s1.VerificationError = err + } + } + + results = append(results, s1) + } + + return results, nil +} + +func (s Scanner) Type() detectorspb.DetectorType { + return detectorspb.DetectorType_Voiceflow +} diff --git a/pkg/detectors/voiceflow/voiceflow_test.go b/pkg/detectors/voiceflow/voiceflow_test.go new file mode 100644 index 000000000000..b7191a6c46ec --- /dev/null +++ b/pkg/detectors/voiceflow/voiceflow_test.go @@ -0,0 +1,302 @@ +//go:build detectors +// +build detectors + +package voiceflow + +import ( + "context" + "fmt" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "testing" + "time" + + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestVoiceflow_Pattern(t *testing.T) { + tests := []struct { + name string + data string + shouldMatch bool + match string + }{ + // True positives + // https://github.com/funDAOmental/endlessquest/blob/5c008f7c6a7e58c45a88b72fef4b965c258d665c/Voiceflow/agent-api/index.js#L6 + { + name: `valid_result1`, + data: `// z0MG IT'S NOT A SECRET (but we'll delete it) +const API_KEY = "VF.DM.6469b4e5909a470007b96250.k4ip0SMy84jWlCsF"; // it should look like this: VF.DM.XXXXXXX.XXXXXX... keep this a secret!`, + shouldMatch: true, + match: `VF.DM.6469b4e5909a470007b96250.k4ip0SMy84jWlCsF`, + }, + // https://github.com/sherifButt/ll-site/blob/b98b268214324da42a84e996e4c03c242e122680/src/components/Chatbot.jsx#L14 + { + name: `valid_result2`, + data: ` const runtime = useRuntime({ + verify: { authorization: 'VF.DM.652da078cde70b0008e1c5df.zsIo23VTxNXKfb9f' }, + session: { userID: 'user_123' }, + });`, + shouldMatch: true, + match: `VF.DM.652da078cde70b0008e1c5df.zsIo23VTxNXKfb9f`, + }, + // https://github.com/the-vv/Voiceflow-chatbot/blob/324db17693dd46387ea7a020e92c4e79b94306c6/src/app/chat/chat.component.ts#L27 + { + name: `valid_result3`, + data: ` this.http.delete('https://general-runtime.voiceflow.com/state/user/TEST_USER', { + headers: { + Authorization: "VF.DM.652ecc210267ec00078fc726.ZFPdEwvU0d1jiIMq" + } + }).subscribe(res => { + this.loading = false; + this.doPrompt('', { action: { type: 'launch' } }); + })`, + shouldMatch: true, + match: `VF.DM.652ecc210267ec00078fc726.ZFPdEwvU0d1jiIMq`, + }, + // https://github.com/legionX7/Graduation-Project-API/blob/451431771d3fba1d8c634b8855274b414d7aed6d/mainAPI.py#L547 + { + name: `valid_result4`, + data: ` +API_KEY = 'VF.DM.646388eb1419c80007bbbaa4.XHOqETFO3cvTxlGl' +VERSION_ID = '646bc'`, + shouldMatch: true, + match: `VF.DM.646388eb1419c80007bbbaa4.XHOqETFO3cvTxlGl`, + }, + // https://github.com/voiceflow/general-runtime/blob/master/tests/runtime/lib/DataAPI/utils.unit.ts + { + name: `valid_result5`, + data: ` it('extracts ID from a Dialog Manager API key', () => { + // eslint-disable-next-line no-secrets/no-secrets + const key = 'VF.DM.628d5d92faf688001bda7907.dmC8KKO1oX8JO5ai'; + const result = utils.extractAPIKeyID(key); + + expect(result).to.equal('628d5d92faf688001bda7907'); + });`, + shouldMatch: true, + match: `VF.DM.628d5d92faf688001bda7907.dmC8KKO1oX8JO5ai`, + }, + { + name: `valid_result6_legacy`, + data: ` it('extracts ID from a Workspace API key', () => { + // eslint-disable-next-line no-secrets/no-secrets + const key = 'VF.WS.62bcb0cca5184300066f5ac7.egnKyyzZksiS5iGa'; + const result = utils.extractAPIKeyID(key); + + expect(result).to.equal('62bcb0cca5184300066f5ac7'); + }); +`, + shouldMatch: true, + match: `VF.WS.62bcb0cca5184300066f5ac7.egnKyyzZksiS5iGa`, + }, + { + name: `valid_result7_legacy`, + data: ` it('extracts ID from a Legacy Workspace API key', () => { + // eslint-disable-next-line no-secrets/no-secrets + const key = 'VF.62bcb0cca5184300066f5ac7.dmC8KKO1oX8JO5az'; + const result = utils.extractAPIKeyID(key); + + expect(result).to.equal('62bcb0cca5184300066f5ac7'); + });`, + shouldMatch: true, + match: `VF.62bcb0cca5184300066f5ac7.dmC8KKO1oX8JO5az`, + }, + + // False positives + // https://github.com/ImperialCollegeLondon/voiceflow-integration-whatsapp/blob/0f3d6a5638b9acb4989d5bf8e77081cc78e9b976/README.md?plain=1#L155 + { + name: `invalid_result1`, + data: "Now, paste it in your .env file for the **VF_PROJECT_API** variable
\n```VF_PROJECT_API='VF.DM.62xxxxxxxxxxxxxxxxxxxxxxx'```", + shouldMatch: false, + }, + // https://github.com/voiceflow/api-examples/blob/c3d8ba9ee8eced7ec8d241973b1eb0284aaec212/rust/src/main.rs#L5 + { + name: `invalid_result2`, + data: `const API_KEY: &str = "YOUR_API_KEY_HERE"; // it should look like this: VF.DM.XXXXXXX.XXXXXX... keep this a secret!`, + shouldMatch: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + s := Scanner{} + + results, err := s.FromData(context.Background(), true, []byte(test.data)) + if err != nil { + t.Errorf("CoinbaseWaaS.FromData() error = %v", err) + return + } + + if test.shouldMatch { + if len(results) == 0 { + t.Errorf("%s: did not receive a match for '%v' when one was expected", test.name, test.data) + return + } + expected := test.data + if test.match != "" { + expected = test.match + } + result := results[0] + if result.VerificationError != nil { + fmt.Printf("VerificationError: %v\n", result.VerificationError) + } + resultData := string(result.Raw) + if resultData != expected { + t.Errorf("%s: did not receive expected match.\n\texpected: '%s'\n\t actual: '%s'", test.name, expected, resultData) + return + } + } else { + if len(results) > 0 { + t.Errorf("%s: received a match for '%v' when one wasn't wanted", test.name, test.data) + return + } + } + }) + } +} + +func TestVoiceflow_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("VOICEFLOW") + inactiveSecret := testSecrets.MustGetField("VOICEFLOW_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s Scanner + args args + want []detectors.Result + wantErr bool + wantVerificationErr bool + }{ + { + name: "found, verified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a voiceflow secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Voiceflow, + Verified: true, + }, + }, + wantErr: false, + wantVerificationErr: false, + }, + { + name: "found, unverified", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a voiceflow secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Voiceflow, + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: false, + }, + { + name: "not found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + wantVerificationErr: false, + }, + { + name: "found, would be verified if not for timeout", + s: Scanner{client: common.SaneHttpClientTimeOut(1 * time.Microsecond)}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a voiceflow secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Voiceflow, + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: true, + }, + { + name: "found, verified but unexpected api surface", + s: Scanner{client: common.ConstantResponseHttpClient(404, "")}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a voiceflow secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_Voiceflow, + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("Voiceflow.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + if (got[i].VerificationError != nil) != tt.wantVerificationErr { + t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError) + } + } + ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "VerificationError") + if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" { + t.Errorf("Voiceflow.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := Scanner{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/engine/defaults.go b/pkg/engine/defaults.go index ca027a0a206a..0fe710589faf 100644 --- a/pkg/engine/defaults.go +++ b/pkg/engine/defaults.go @@ -11,6 +11,7 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/sourcegraphcody" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/tailscale" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/trufflehogenterprise" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/voiceflow" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/abbysale" @@ -1554,6 +1555,7 @@ func DefaultDetectors() []detectors.Detector { &ramp.Scanner{}, &anthropic.Scanner{}, &sourcegraphcody.Scanner{}, + voiceflow.Scanner{}, } } diff --git a/pkg/pb/detectorspb/detectors.pb.go b/pkg/pb/detectorspb/detectors.pb.go index 6dad4af73610..c863b369165a 100644 --- a/pkg/pb/detectorspb/detectors.pb.go +++ b/pkg/pb/detectorspb/detectors.pb.go @@ -1008,6 +1008,7 @@ const ( DetectorType_Ramp DetectorType = 934 DetectorType_Klaviyo DetectorType = 935 DetectorType_SourcegraphCody DetectorType = 936 + DetectorType_Voiceflow DetectorType = 937 ) // Enum value maps for DetectorType. @@ -1946,6 +1947,7 @@ var ( 934: "Ramp", 935: "Klaviyo", 936: "SourcegraphCody", + 937: "Voiceflow", } DetectorType_value = map[string]int32{ "Alibaba": 0, @@ -2881,6 +2883,7 @@ var ( "Ramp": 934, "Klaviyo": 935, "SourcegraphCody": 936, + "Voiceflow": 937, } ) @@ -3259,7 +3262,7 @@ var file_detectors_proto_rawDesc = []byte{ 0x44, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, 0x50, 0x4c, 0x41, 0x49, 0x4e, 0x10, 0x01, 0x12, 0x0a, 0x0a, 0x06, 0x42, 0x41, 0x53, 0x45, 0x36, 0x34, 0x10, 0x02, 0x12, - 0x09, 0x0a, 0x05, 0x55, 0x54, 0x46, 0x31, 0x36, 0x10, 0x03, 0x2a, 0xa6, 0x75, 0x0a, 0x0c, 0x44, + 0x09, 0x0a, 0x05, 0x55, 0x54, 0x46, 0x31, 0x36, 0x10, 0x03, 0x2a, 0xb6, 0x75, 0x0a, 0x0c, 0x44, 0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x54, 0x79, 0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x41, 0x6c, 0x69, 0x62, 0x61, 0x62, 0x61, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x41, 0x4d, 0x51, 0x50, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x41, 0x57, 0x53, 0x10, 0x02, 0x12, 0x09, 0x0a, 0x05, 0x41, @@ -4198,7 +4201,8 @@ var file_detectors_proto_rawDesc = []byte{ 0x63, 0x10, 0xa5, 0x07, 0x12, 0x09, 0x0a, 0x04, 0x52, 0x61, 0x6d, 0x70, 0x10, 0xa6, 0x07, 0x12, 0x0c, 0x0a, 0x07, 0x4b, 0x6c, 0x61, 0x76, 0x69, 0x79, 0x6f, 0x10, 0xa7, 0x07, 0x12, 0x14, 0x0a, 0x0f, 0x53, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x67, 0x72, 0x61, 0x70, 0x68, 0x43, 0x6f, 0x64, 0x79, - 0x10, 0xa8, 0x07, 0x42, 0x3d, 0x5a, 0x3b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, + 0x10, 0xa8, 0x07, 0x12, 0x0e, 0x0a, 0x09, 0x56, 0x6f, 0x69, 0x63, 0x65, 0x66, 0x6c, 0x6f, 0x77, + 0x10, 0xa9, 0x07, 0x42, 0x3d, 0x5a, 0x3b, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, 0x65, 0x73, 0x65, 0x63, 0x75, 0x72, 0x69, 0x74, 0x79, 0x2f, 0x74, 0x72, 0x75, 0x66, 0x66, 0x6c, 0x65, 0x68, 0x6f, 0x67, 0x2f, 0x76, 0x33, 0x2f, 0x70, 0x6b, 0x67, 0x2f, 0x70, 0x62, 0x2f, 0x64, 0x65, 0x74, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x73, diff --git a/proto/detectors.proto b/proto/detectors.proto index 269916431326..289798075111 100644 --- a/proto/detectors.proto +++ b/proto/detectors.proto @@ -945,6 +945,7 @@ enum DetectorType { Ramp = 934; Klaviyo = 935; SourcegraphCody = 936; + Voiceflow = 937; } message Result {