From f2f62abd262725cf495560e3ddf2c1e805c7bb68 Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 14:28:16 -0800 Subject: [PATCH 01/29] Extend memory cache to allow for configuring custom expiration and purge interval --- pkg/cache/memory/memory.go | 55 +++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/pkg/cache/memory/memory.go b/pkg/cache/memory/memory.go index b4e9c87c3cfd..5be629109894 100644 --- a/pkg/cache/memory/memory.go +++ b/pkg/cache/memory/memory.go @@ -10,33 +10,64 @@ import ( ) const ( - expirationInterval = 12 * time.Hour - purgeInterval = 13 * time.Hour - defaultExpiration = cache.DefaultExpiration + defaultExpirationInterval = 12 * time.Hour + defaultPurgeInterval = 13 * time.Hour + defaultExpiration = cache.DefaultExpiration ) -// Cache is a wrapper around the go-cache library. +// Cache wraps the go-cache library to provide an in-memory key-value store. type Cache struct { - c *cache.Cache + c *cache.Cache + expiration time.Duration + purgeInterval time.Duration } -// New constructs a new in-memory cache. -func New() *Cache { - c := cache.New(expirationInterval, purgeInterval) - return &Cache{c: c} +// CacheOption defines a function type used for configuring a Cache. +type CacheOption func(*Cache) + +// WithExpirationInterval returns a CacheOption to set the expiration interval of cache items. +// The interval determines the duration a cached item remains in the cache before it is expired. +func WithExpirationInterval(interval time.Duration) CacheOption { + return func(c *Cache) { c.expiration = interval } +} + +// WithPurgeInterval returns a CacheOption to set the interval at which the cache purges expired items. +// Regular purging helps in freeing up memory by removing stale entries. +func WithPurgeInterval(interval time.Duration) CacheOption { + return func(c *Cache) { c.purgeInterval = interval } +} + +// New constructs a new in-memory cache instance with optional configurations. +// By default, it sets the expiration and purge intervals to 12 and 13 hours, respectively. +// These defaults can be overridden using the functional options: WithExpirationInterval and WithPurgeInterval. +func New(opts ...CacheOption) *Cache { + instance := &Cache{expiration: defaultExpirationInterval, purgeInterval: defaultPurgeInterval} + for _, opt := range opts { + opt(instance) + } + + instance.c = cache.New(instance.expiration, instance.purgeInterval) + return instance } // NewWithData constructs a new in-memory cache with existing data. -func NewWithData(ctx context.Context, data []string) *Cache { +// It also accepts CacheOption parameters to override default configuration values. +func NewWithData(ctx context.Context, data []string, opts ...CacheOption) *Cache { ctx.Logger().V(3).Info("Loading cache", "num-items", len(data)) + instance := &Cache{expiration: defaultExpirationInterval, purgeInterval: defaultPurgeInterval} + for _, opt := range opts { + opt(instance) + } + + // Convert data slice to map required by go-cache. items := make(map[string]cache.Item, len(data)) for _, d := range data { items[d] = cache.Item{Object: d, Expiration: int64(defaultExpiration)} } - c := cache.NewFrom(expirationInterval, purgeInterval, items) - return &Cache{c: c} + instance.c = cache.NewFrom(instance.expiration, instance.purgeInterval, items) + return instance } // Set adds a key-value pair to the cache. From 9c016d70e7d80c1114beca0eff51c5232fe2ae4b Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 14:54:10 -0800 Subject: [PATCH 02/29] use any for value type --- pkg/cache/cache.go | 6 +++--- pkg/cache/memory/memory.go | 16 ++++++---------- pkg/sources/gcs/gcs.go | 2 +- pkg/sources/github/github.go | 10 +++++++++- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go index 65a0bf2a88b4..da7156ac71f1 100644 --- a/pkg/cache/cache.go +++ b/pkg/cache/cache.go @@ -4,9 +4,9 @@ package cache // Cache is used to store key/value pairs. type Cache interface { // Set stores the given key/value pair. - Set(string, string) + Set(string, any) // Get returns the value for the given key and a boolean indicating if the key was found. - Get(string) (string, bool) + Get(string) (any, bool) // Exists returns true if the given key exists in the cache. Exists(string) bool // Delete the given key from the cache. @@ -18,7 +18,7 @@ type Cache interface { // Keys returns all keys in the cache. Keys() []string // Values returns all values in the cache. - Values() []string + Values() []any // Contents returns all keys in the cache encoded as a string. Contents() string } diff --git a/pkg/cache/memory/memory.go b/pkg/cache/memory/memory.go index 5be629109894..8cdd534aff52 100644 --- a/pkg/cache/memory/memory.go +++ b/pkg/cache/memory/memory.go @@ -71,17 +71,13 @@ func NewWithData(ctx context.Context, data []string, opts ...CacheOption) *Cache } // Set adds a key-value pair to the cache. -func (c *Cache) Set(key, value string) { +func (c *Cache) Set(key string, value any) { c.c.Set(key, value, defaultExpiration) } // Get returns the value for the given key. -func (c *Cache) Get(key string) (string, bool) { - res, ok := c.c.Get(key) - if !ok { - return "", ok - } - return res.(string), ok +func (c *Cache) Get(key string) (any, bool) { + return c.c.Get(key) } // Exists returns true if the given key exists in the cache. @@ -116,11 +112,11 @@ func (c *Cache) Keys() []string { } // Values returns all values in the cache. -func (c *Cache) Values() []string { +func (c *Cache) Values() []any { items := c.c.Items() - res := make([]string, 0, len(items)) + res := make([]any, 0, len(items)) for _, v := range items { - res = append(res, v.Object.(string)) + res = append(res, v.Object) } return res } diff --git a/pkg/sources/gcs/gcs.go b/pkg/sources/gcs/gcs.go index 3141ac47ae32..6894642e197a 100644 --- a/pkg/sources/gcs/gcs.go +++ b/pkg/sources/gcs/gcs.go @@ -97,7 +97,7 @@ func newPersistableCache(increment int, cache cache.Cache, p *sources.Progress) // Set overrides the cache Set method of the cache to enable the persistence // of the cache contents the Progress of the source at given increments. -func (c *persistableCache) Set(key, val string) { +func (c *persistableCache) Set(key string, val any) { c.Cache.Set(key, val) if ok, contents := c.shouldPersist(); ok { c.Progress.EncodedResumeInfo = contents diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 334d32f6d854..8ad38954f33c 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -472,7 +472,15 @@ func (s *Source) enumerate(ctx context.Context, apiEndpoint string) (*github.Cli return nil, errors.Errorf("Invalid configuration given for source. Name: %s, Type: %s", s.name, s.Type()) } - s.repos = s.filteredRepoCache.Values() + s.repos = make([]string, 0, s.filteredRepoCache.Count()) + for _, repo := range s.filteredRepoCache.Values() { + r, ok := repo.(string) + if !ok { + ctx.Logger().Error(fmt.Errorf("type assertion failed"), "repo not found in cache", "repo", repo) + continue + } + s.repos = append(s.repos, r) + } githubReposEnumerated.WithLabelValues(s.name).Set(float64(len(s.repos))) s.log.Info("Completed enumeration", "num_repos", len(s.repos), "num_orgs", s.orgsCache.Count(), "num_members", len(s.memberCache)) From e6685c741b9fcb94f760ccdf2d20288cbffcdb07 Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 14:57:06 -0800 Subject: [PATCH 03/29] fix test --- pkg/cache/memory/memory_test.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/cache/memory/memory_test.go b/pkg/cache/memory/memory_test.go index e1df315c8cf7..e9adf7b12721 100644 --- a/pkg/cache/memory/memory_test.go +++ b/pkg/cache/memory/memory_test.go @@ -60,7 +60,10 @@ func TestCache(t *testing.T) { } // Test getting only the values. - vals := c.Values() + vals := make([]string, 0, c.Count()) + for _, v := range c.Values() { + vals = append(vals, v.(string)) + } sort.Strings(vals) sort.Strings(values) if !cmp.Equal(values, vals) { From 6a4eabde51ee9ed0ec0810c7e0a10c2e8a563caf Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 15:15:11 -0800 Subject: [PATCH 04/29] fix test --- pkg/cache/memory/memory_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/cache/memory/memory_test.go b/pkg/cache/memory/memory_test.go index e9adf7b12721..76ec72884dcd 100644 --- a/pkg/cache/memory/memory_test.go +++ b/pkg/cache/memory/memory_test.go @@ -34,7 +34,7 @@ func TestCache(t *testing.T) { // Test delete. c.Delete("key1") v, ok = c.Get("key1") - if ok || v != "" { + if ok || v != nil { t.Fatalf("Unexpected value for key1 after delete: %v, %v", v, ok) } @@ -42,7 +42,7 @@ func TestCache(t *testing.T) { c.Set("key10", "key10") c.Clear() v, ok = c.Get("key10") - if ok || v != "" { + if ok || v != nil { t.Fatalf("Unexpected value for key10 after clear: %v, %v", v, ok) } From 5dc03be181632ce75c358efcbd9e4dad07c2019c Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 14:39:29 -0800 Subject: [PATCH 05/29] cache results to prevent multiple network calls using the same creds --- pkg/detectors/aws/aws.go | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index b29cd83802df..da290df050fc 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -9,9 +9,12 @@ import ( "fmt" "net/http" "regexp" + "strconv" "strings" "time" + "github.com/trufflesecurity/trufflehog/v3/pkg/cache" + "github.com/trufflesecurity/trufflehog/v3/pkg/cache/memory" "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" @@ -20,6 +23,8 @@ import ( type scanner struct { verificationClient *http.Client skipIDs map[string]struct{} + + credsCache cache.Cache } // resourceTypes derived from: https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_identifiers.html#identifiers-unique-ids @@ -47,6 +52,11 @@ func New(opts ...func(*scanner)) *scanner { opt(scanner) } + scanner.credsCache = memory.New( + memory.WithExpirationInterval(1*time.Hour), + memory.WithPurgeInterval(2*time.Hour), + ) + return scanner } @@ -126,16 +136,24 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result } resSecretMatch := strings.TrimSpace(secretMatch[1]) + rawV2 := resIDMatch + resSecretMatch + s1 := detectors.Result{ DetectorType: detectorspb.DetectorType_AWS, Raw: []byte(resIDMatch), Redacted: resIDMatch, - RawV2: []byte(resIDMatch + resSecretMatch), + RawV2: []byte(rawV2), ExtraData: map[string]string{ "resource_type": resourceTypes[idMatch[2]], }, } + if isVerified, ok := s.credsCache.Get(rawV2); ok { + s1.Verified = isVerified == "true" + results = append(results, s1) + continue + } + if verify { isVerified, extraData, verificationErr := s.verifyMatch(ctx, resIDMatch, resSecretMatch, true) s1.Verified = isVerified @@ -153,6 +171,7 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result } } + s.credsCache.Set(rawV2, strconv.FormatBool(s1.Verified)) if !s1.Verified { // Unverified results that contain common test words are probably not secrets if detectors.IsKnownFalsePositive(resSecretMatch, detectors.DefaultFalsePositives, true) { From 82348f94088fe6f4484776abc3b6a0125539ebe7 Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 15:31:13 -0800 Subject: [PATCH 06/29] use custom value --- pkg/detectors/aws/aws.go | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index da290df050fc..f4aa6ed3d16d 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -9,7 +9,6 @@ import ( "fmt" "net/http" "regexp" - "strconv" "strings" "time" @@ -111,6 +110,15 @@ func GetHMAC(key []byte, data []byte) []byte { return hasher.Sum(nil) } +// cacheItem represents an item stored in the cache, encompassing the outcome of a verification process. +// It includes the verification result, ExtraData, and any VerificationErrors encountered during verification. +// This struct facilitates the reconstruction of detectors.Result with values for previously verified credentials. +type cacheItem struct { + extra map[string]string + verificationErr error + verified bool +} + // FromData will find and optionally verify AWS secrets in a given set of bytes. func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) @@ -148,15 +156,25 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result }, } - if isVerified, ok := s.credsCache.Get(rawV2); ok { - s1.Verified = isVerified == "true" + if val, ok := s.credsCache.Get(rawV2); ok { + item, ok := val.(cacheItem) + if !ok { + continue + } + s1.Verified = item.verified + s1.ExtraData = item.extra + if item.verificationErr != nil { + s1.SetVerificationError(item.verificationErr, resSecretMatch) + } results = append(results, s1) continue } + var cacheItem cacheItem if verify { isVerified, extraData, verificationErr := s.verifyMatch(ctx, resIDMatch, resSecretMatch, true) s1.Verified = isVerified + cacheItem.verified = isVerified // It'd be good to log when calculated account value does not match // the account value from verification. Should only be edge cases at most. // if extraData["account"] != s1.ExtraData["account"] && extraData["account"] != "" {//log here} @@ -166,12 +184,15 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result for k, v := range extraData { s1.ExtraData[k] = v } + cacheItem.extra = s1.ExtraData if verificationErr != nil { s1.SetVerificationError(verificationErr, resSecretMatch) + cacheItem.verificationErr = verificationErr } } - s.credsCache.Set(rawV2, strconv.FormatBool(s1.Verified)) + // Cache the result. + s.credsCache.Set(rawV2, cacheItem) if !s1.Verified { // Unverified results that contain common test words are probably not secrets if detectors.IsKnownFalsePositive(resSecretMatch, detectors.DefaultFalsePositives, true) { From 39b1642c0067aba426c4607713c7122ffb37bc88 Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 15:41:39 -0800 Subject: [PATCH 07/29] rename field --- pkg/detectors/aws/aws.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index f4aa6ed3d16d..c12cd592260d 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -116,7 +116,7 @@ func GetHMAC(key []byte, data []byte) []byte { type cacheItem struct { extra map[string]string verificationErr error - verified bool + isVerified bool } // FromData will find and optionally verify AWS secrets in a given set of bytes. @@ -161,7 +161,7 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result if !ok { continue } - s1.Verified = item.verified + s1.Verified = item.isVerified s1.ExtraData = item.extra if item.verificationErr != nil { s1.SetVerificationError(item.verificationErr, resSecretMatch) @@ -174,7 +174,7 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result if verify { isVerified, extraData, verificationErr := s.verifyMatch(ctx, resIDMatch, resSecretMatch, true) s1.Verified = isVerified - cacheItem.verified = isVerified + cacheItem.isVerified = isVerified // It'd be good to log when calculated account value does not match // the account value from verification. Should only be edge cases at most. // if extraData["account"] != s1.ExtraData["account"] && extraData["account"] != "" {//log here} From 4af9af3957f99b6912fdf6d2e0a576e21884a044 Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 15:51:06 -0800 Subject: [PATCH 08/29] use ptr for the cacheItem --- pkg/detectors/aws/aws.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index c12cd592260d..2340d80a72dc 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -192,7 +192,7 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result } // Cache the result. - s.credsCache.Set(rawV2, cacheItem) + s.credsCache.Set(rawV2, &cacheItem) if !s1.Verified { // Unverified results that contain common test words are probably not secrets if detectors.IsKnownFalsePositive(resSecretMatch, detectors.DefaultFalsePositives, true) { From e4b465d66b547e5f092103cfadd27cf9dcd25a04 Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 16:01:41 -0800 Subject: [PATCH 09/29] use constructor --- pkg/detectors/aws/aws.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index 2340d80a72dc..1eb058a7d7b0 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -119,6 +119,14 @@ type cacheItem struct { isVerified bool } +func newCacheItem(isVerified bool, verificationErr error, extra map[string]string) *cacheItem { + item := &cacheItem{verificationErr: verificationErr, isVerified: isVerified} + if extra != nil { + item.extra = extra + } + return item +} + // FromData will find and optionally verify AWS secrets in a given set of bytes. func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) @@ -170,11 +178,9 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result continue } - var cacheItem cacheItem if verify { isVerified, extraData, verificationErr := s.verifyMatch(ctx, resIDMatch, resSecretMatch, true) s1.Verified = isVerified - cacheItem.isVerified = isVerified // It'd be good to log when calculated account value does not match // the account value from verification. Should only be edge cases at most. // if extraData["account"] != s1.ExtraData["account"] && extraData["account"] != "" {//log here} @@ -184,15 +190,13 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result for k, v := range extraData { s1.ExtraData[k] = v } - cacheItem.extra = s1.ExtraData if verificationErr != nil { s1.SetVerificationError(verificationErr, resSecretMatch) - cacheItem.verificationErr = verificationErr } } // Cache the result. - s.credsCache.Set(rawV2, &cacheItem) + s.credsCache.Set(rawV2, newCacheItem(s1.Verified, s1.VerificationError(), s1.ExtraData)) if !s1.Verified { // Unverified results that contain common test words are probably not secrets if detectors.IsKnownFalsePositive(resSecretMatch, detectors.DefaultFalsePositives, true) { From dd9b6ea5b842f41e3c27a79f8dc70697e902478e Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 16:03:39 -0800 Subject: [PATCH 10/29] remove nil check --- pkg/detectors/aws/aws.go | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index 1eb058a7d7b0..b2a88044d3f0 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -120,11 +120,7 @@ type cacheItem struct { } func newCacheItem(isVerified bool, verificationErr error, extra map[string]string) *cacheItem { - item := &cacheItem{verificationErr: verificationErr, isVerified: isVerified} - if extra != nil { - item.extra = extra - } - return item + return &cacheItem{verificationErr: verificationErr, isVerified: isVerified, extra: extra} } // FromData will find and optionally verify AWS secrets in a given set of bytes. @@ -171,9 +167,7 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result } s1.Verified = item.isVerified s1.ExtraData = item.extra - if item.verificationErr != nil { - s1.SetVerificationError(item.verificationErr, resSecretMatch) - } + s1.SetVerificationError(item.verificationErr, resSecretMatch) results = append(results, s1) continue } From f45bb31cc4d98057a6533e47cc8223e77254e38f Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 16:07:15 -0800 Subject: [PATCH 11/29] add method to popualte result from the cache item --- pkg/detectors/aws/aws.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index b2a88044d3f0..e3de838e0e3e 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -123,6 +123,13 @@ func newCacheItem(isVerified bool, verificationErr error, extra map[string]strin return &cacheItem{verificationErr: verificationErr, isVerified: isVerified, extra: extra} } +// populateResult populates the given detectors.Result with the values from the cacheItem. +func (c *cacheItem) populateResult(result *detectors.Result) { + result.Verified = c.isVerified + result.ExtraData = c.extra + result.SetVerificationError(c.verificationErr) +} + // FromData will find and optionally verify AWS secrets in a given set of bytes. func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) @@ -165,9 +172,7 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result if !ok { continue } - s1.Verified = item.isVerified - s1.ExtraData = item.extra - s1.SetVerificationError(item.verificationErr, resSecretMatch) + item.populateResult(&s1) results = append(results, s1) continue } From f286c33b06aff0eca01fc7d629c8cafbb11ae91f Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 16:10:08 -0800 Subject: [PATCH 12/29] update comment --- pkg/detectors/aws/aws.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index e3de838e0e3e..8a16bb5069bd 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -111,16 +111,16 @@ func GetHMAC(key []byte, data []byte) []byte { } // cacheItem represents an item stored in the cache, encompassing the outcome of a verification process. -// It includes the verification result, ExtraData, and any VerificationErrors encountered during verification. -// This struct facilitates the reconstruction of detectors.Result with values for previously verified credentials. +// It includes the verification result, ExtraData, and VerificationErrors encountered during verification. +// This facilitates the reconstruction of detectors.Result with values for previously verified creds. type cacheItem struct { - extra map[string]string - verificationErr error isVerified bool + verificationErr error + extra map[string]string } func newCacheItem(isVerified bool, verificationErr error, extra map[string]string) *cacheItem { - return &cacheItem{verificationErr: verificationErr, isVerified: isVerified, extra: extra} + return &cacheItem{isVerified, verificationErr, extra} } // populateResult populates the given detectors.Result with the values from the cacheItem. From c4162e261e0ada3bfcb31cc3aeb6ffb0cba1ab8b Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 16:13:09 -0800 Subject: [PATCH 13/29] missed a ptr --- pkg/detectors/aws/aws.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index 8a16bb5069bd..56e51775d410 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -168,7 +168,7 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result } if val, ok := s.credsCache.Get(rawV2); ok { - item, ok := val.(cacheItem) + item, ok := val.(*cacheItem) if !ok { continue } From 318eebbbaefa74bf45b703b1a22eb2081c89652e Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 17:05:26 -0800 Subject: [PATCH 14/29] add test --- pkg/detectors/aws/aws_test.go | 55 +++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/pkg/detectors/aws/aws_test.go b/pkg/detectors/aws/aws_test.go index 8ab69552e0e4..a6da72a3fa4f 100644 --- a/pkg/detectors/aws/aws_test.go +++ b/pkg/detectors/aws/aws_test.go @@ -7,11 +7,14 @@ import ( "context" "crypto/sha256" "fmt" + "net/http" "testing" "time" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + + "github.com/trufflesecurity/trufflehog/v3/pkg/cache/memory" "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" @@ -326,6 +329,58 @@ func TestAWS_FromChunk(t *testing.T) { } } +// TestAWSFromDataCacheDuplicateCreds tests that duplicate credentials are not verified against the AWS API +// multiple times. +func TestAWSFromDataCacheDuplicateCreds(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors4") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("AWS") + id := testSecrets.MustGetField("AWS_ID") + + // Mock HTTP client to intercept AWS API requests and count them. + apiCallCount := 0 + mockClient := &http.Client{ + Transport: roundTripperFunc(func(req *http.Request) (*http.Response, error) { + apiCallCount++ + return &http.Response{StatusCode: http.StatusOK}, nil + }), + } + + s := scanner{verificationClient: mockClient, credsCache: memory.New()} + + testData := []byte(fmt.Sprintf("You can find a aws secret %s within aws %s", secret, id)) + + // First call - expect cache to be empty and an API call to be made. + _, err = s.FromData(context.Background(), true, testData) + if err != nil { + t.Fatalf("Error processing data: %s", err) + } + if apiCallCount != 1 { + t.Fatalf("Expected 1 API call, got %d", apiCallCount) + } + + // Second call with the same data - expect cache to be used and no additional API calls. + _, err = s.FromData(context.Background(), true, testData) + if err != nil { + t.Fatalf("Error processing data: %s", err) + } + if apiCallCount != 1 { + t.Fatalf("Cache did not work as expected, API call count: %d", apiCallCount) + } +} + +// roundTripperFunc type is an adapter to allow the use of ordinary functions as HTTP round trippers. +type roundTripperFunc func(*http.Request) (*http.Response, error) + +func (f roundTripperFunc) RoundTrip(r *http.Request) (*http.Response, error) { + return f(r) +} + func BenchmarkFromData(benchmark *testing.B) { ctx := context.Background() s := scanner{} From c1a330df6ae994531509a91034a503c7aac088ea Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 17:06:04 -0800 Subject: [PATCH 15/29] rename --- pkg/detectors/aws/aws_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/detectors/aws/aws_test.go b/pkg/detectors/aws/aws_test.go index a6da72a3fa4f..a01841eda3a4 100644 --- a/pkg/detectors/aws/aws_test.go +++ b/pkg/detectors/aws/aws_test.go @@ -351,12 +351,12 @@ func TestAWSFromDataCacheDuplicateCreds(t *testing.T) { }), } - s := scanner{verificationClient: mockClient, credsCache: memory.New()} + detector := scanner{verificationClient: mockClient, credsCache: memory.New()} testData := []byte(fmt.Sprintf("You can find a aws secret %s within aws %s", secret, id)) // First call - expect cache to be empty and an API call to be made. - _, err = s.FromData(context.Background(), true, testData) + _, err = detector.FromData(context.Background(), true, testData) if err != nil { t.Fatalf("Error processing data: %s", err) } @@ -365,7 +365,7 @@ func TestAWSFromDataCacheDuplicateCreds(t *testing.T) { } // Second call with the same data - expect cache to be used and no additional API calls. - _, err = s.FromData(context.Background(), true, testData) + _, err = detector.FromData(context.Background(), true, testData) if err != nil { t.Fatalf("Error processing data: %s", err) } From 8ceef69e019e78961cb060368ef7948444746e79 Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Mon, 8 Jan 2024 09:15:13 -0800 Subject: [PATCH 16/29] address comments --- pkg/cache/memory/memory.go | 12 ++++++++++-- pkg/cache/memory/memory_test.go | 3 ++- pkg/sources/gcs/gcs.go | 8 +++++++- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/pkg/cache/memory/memory.go b/pkg/cache/memory/memory.go index 8cdd534aff52..d2c46f702362 100644 --- a/pkg/cache/memory/memory.go +++ b/pkg/cache/memory/memory.go @@ -50,9 +50,17 @@ func New(opts ...CacheOption) *Cache { return instance } +// CacheEntry represents a single entry in the cache, consisting of a key and its corresponding value. +type CacheEntry struct { + // Key is the unique identifier for the entry. + Key string + // Value is the data stored in the entry. + Value any +} + // NewWithData constructs a new in-memory cache with existing data. // It also accepts CacheOption parameters to override default configuration values. -func NewWithData(ctx context.Context, data []string, opts ...CacheOption) *Cache { +func NewWithData(ctx context.Context, data []CacheEntry, opts ...CacheOption) *Cache { ctx.Logger().V(3).Info("Loading cache", "num-items", len(data)) instance := &Cache{expiration: defaultExpirationInterval, purgeInterval: defaultPurgeInterval} @@ -63,7 +71,7 @@ func NewWithData(ctx context.Context, data []string, opts ...CacheOption) *Cache // Convert data slice to map required by go-cache. items := make(map[string]cache.Item, len(data)) for _, d := range data { - items[d] = cache.Item{Object: d, Expiration: int64(defaultExpiration)} + items[d.Key] = cache.Item{Object: d.Value, Expiration: int64(defaultExpiration)} } instance.c = cache.NewFrom(instance.expiration, instance.purgeInterval, items) diff --git a/pkg/cache/memory/memory_test.go b/pkg/cache/memory/memory_test.go index 76ec72884dcd..baac4ce9292f 100644 --- a/pkg/cache/memory/memory_test.go +++ b/pkg/cache/memory/memory_test.go @@ -85,7 +85,8 @@ func TestCache(t *testing.T) { } func TestCache_NewWithData(t *testing.T) { - c := NewWithData(logContext.Background(), []string{"key1", "key2", "key3"}) + data := []CacheEntry{{"key1", "value1"}, {"key2", "value2"}, {"key3", "value3"}} + c := NewWithData(logContext.Background(), data) // Test the count. if c.Count() != 3 { diff --git a/pkg/sources/gcs/gcs.go b/pkg/sources/gcs/gcs.go index 6894642e197a..a08b3b375ba8 100644 --- a/pkg/sources/gcs/gcs.go +++ b/pkg/sources/gcs/gcs.go @@ -297,7 +297,13 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, _ . func (s *Source) setupCache(ctx context.Context) *persistableCache { var c cache.Cache if s.Progress.EncodedResumeInfo != "" { - c = memory.NewWithData(ctx, strings.Split(s.Progress.EncodedResumeInfo, ",")) + keys := strings.Split(s.Progress.EncodedResumeInfo, ",") + entries := make([]memory.CacheEntry, len(keys)) + for i, val := range keys { + entries[i] = memory.CacheEntry{Key: val, Value: val} + } + + c = memory.NewWithData(ctx, entries) } else { c = memory.New() } From 5609ca3939e9126b91f87c3f92f700142d73c2df Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Mon, 8 Jan 2024 09:16:04 -0800 Subject: [PATCH 17/29] address --- pkg/sources/github/github.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 8ad38954f33c..f0e03020c1f0 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -476,7 +476,7 @@ func (s *Source) enumerate(ctx context.Context, apiEndpoint string) (*github.Cli for _, repo := range s.filteredRepoCache.Values() { r, ok := repo.(string) if !ok { - ctx.Logger().Error(fmt.Errorf("type assertion failed"), "repo not found in cache", "repo", repo) + ctx.Logger().Error(fmt.Errorf("type assertion failed"), "unexpected value in cache", "repo", repo) continue } s.repos = append(s.repos, r) From 47916e3e6722675d14e6d707249e11af272b2a18 Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Mon, 8 Jan 2024 09:20:27 -0800 Subject: [PATCH 18/29] make new construct more clear --- pkg/cache/memory/memory.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pkg/cache/memory/memory.go b/pkg/cache/memory/memory.go index d2c46f702362..9b5da2621c03 100644 --- a/pkg/cache/memory/memory.go +++ b/pkg/cache/memory/memory.go @@ -41,13 +41,14 @@ func WithPurgeInterval(interval time.Duration) CacheOption { // By default, it sets the expiration and purge intervals to 12 and 13 hours, respectively. // These defaults can be overridden using the functional options: WithExpirationInterval and WithPurgeInterval. func New(opts ...CacheOption) *Cache { - instance := &Cache{expiration: defaultExpirationInterval, purgeInterval: defaultPurgeInterval} + configurableCache := &Cache{expiration: defaultExpirationInterval, purgeInterval: defaultPurgeInterval} for _, opt := range opts { - opt(instance) + opt(configurableCache) } - instance.c = cache.New(instance.expiration, instance.purgeInterval) - return instance + // The underlying cache is initialized with the configured expiration and purge intervals. + configurableCache.c = cache.New(configurableCache.expiration, configurableCache.purgeInterval) + return configurableCache } // CacheEntry represents a single entry in the cache, consisting of a key and its corresponding value. From 32b75c2a812968e4b24cf6c86f6689d43e828966 Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 14:39:29 -0800 Subject: [PATCH 19/29] cache results to prevent multiple network calls using the same creds --- pkg/detectors/aws/aws.go | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index b29cd83802df..da290df050fc 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -9,9 +9,12 @@ import ( "fmt" "net/http" "regexp" + "strconv" "strings" "time" + "github.com/trufflesecurity/trufflehog/v3/pkg/cache" + "github.com/trufflesecurity/trufflehog/v3/pkg/cache/memory" "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" @@ -20,6 +23,8 @@ import ( type scanner struct { verificationClient *http.Client skipIDs map[string]struct{} + + credsCache cache.Cache } // resourceTypes derived from: https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_identifiers.html#identifiers-unique-ids @@ -47,6 +52,11 @@ func New(opts ...func(*scanner)) *scanner { opt(scanner) } + scanner.credsCache = memory.New( + memory.WithExpirationInterval(1*time.Hour), + memory.WithPurgeInterval(2*time.Hour), + ) + return scanner } @@ -126,16 +136,24 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result } resSecretMatch := strings.TrimSpace(secretMatch[1]) + rawV2 := resIDMatch + resSecretMatch + s1 := detectors.Result{ DetectorType: detectorspb.DetectorType_AWS, Raw: []byte(resIDMatch), Redacted: resIDMatch, - RawV2: []byte(resIDMatch + resSecretMatch), + RawV2: []byte(rawV2), ExtraData: map[string]string{ "resource_type": resourceTypes[idMatch[2]], }, } + if isVerified, ok := s.credsCache.Get(rawV2); ok { + s1.Verified = isVerified == "true" + results = append(results, s1) + continue + } + if verify { isVerified, extraData, verificationErr := s.verifyMatch(ctx, resIDMatch, resSecretMatch, true) s1.Verified = isVerified @@ -153,6 +171,7 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result } } + s.credsCache.Set(rawV2, strconv.FormatBool(s1.Verified)) if !s1.Verified { // Unverified results that contain common test words are probably not secrets if detectors.IsKnownFalsePositive(resSecretMatch, detectors.DefaultFalsePositives, true) { From 5605ad03292a09066fd41d5eb433761212898dab Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 15:31:13 -0800 Subject: [PATCH 20/29] use custom value --- pkg/detectors/aws/aws.go | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index da290df050fc..f4aa6ed3d16d 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -9,7 +9,6 @@ import ( "fmt" "net/http" "regexp" - "strconv" "strings" "time" @@ -111,6 +110,15 @@ func GetHMAC(key []byte, data []byte) []byte { return hasher.Sum(nil) } +// cacheItem represents an item stored in the cache, encompassing the outcome of a verification process. +// It includes the verification result, ExtraData, and any VerificationErrors encountered during verification. +// This struct facilitates the reconstruction of detectors.Result with values for previously verified credentials. +type cacheItem struct { + extra map[string]string + verificationErr error + verified bool +} + // FromData will find and optionally verify AWS secrets in a given set of bytes. func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) @@ -148,15 +156,25 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result }, } - if isVerified, ok := s.credsCache.Get(rawV2); ok { - s1.Verified = isVerified == "true" + if val, ok := s.credsCache.Get(rawV2); ok { + item, ok := val.(cacheItem) + if !ok { + continue + } + s1.Verified = item.verified + s1.ExtraData = item.extra + if item.verificationErr != nil { + s1.SetVerificationError(item.verificationErr, resSecretMatch) + } results = append(results, s1) continue } + var cacheItem cacheItem if verify { isVerified, extraData, verificationErr := s.verifyMatch(ctx, resIDMatch, resSecretMatch, true) s1.Verified = isVerified + cacheItem.verified = isVerified // It'd be good to log when calculated account value does not match // the account value from verification. Should only be edge cases at most. // if extraData["account"] != s1.ExtraData["account"] && extraData["account"] != "" {//log here} @@ -166,12 +184,15 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result for k, v := range extraData { s1.ExtraData[k] = v } + cacheItem.extra = s1.ExtraData if verificationErr != nil { s1.SetVerificationError(verificationErr, resSecretMatch) + cacheItem.verificationErr = verificationErr } } - s.credsCache.Set(rawV2, strconv.FormatBool(s1.Verified)) + // Cache the result. + s.credsCache.Set(rawV2, cacheItem) if !s1.Verified { // Unverified results that contain common test words are probably not secrets if detectors.IsKnownFalsePositive(resSecretMatch, detectors.DefaultFalsePositives, true) { From 6c6d58e437ff89b1393dde5e85765144b34fca74 Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 15:41:39 -0800 Subject: [PATCH 21/29] rename field --- pkg/detectors/aws/aws.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index f4aa6ed3d16d..c12cd592260d 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -116,7 +116,7 @@ func GetHMAC(key []byte, data []byte) []byte { type cacheItem struct { extra map[string]string verificationErr error - verified bool + isVerified bool } // FromData will find and optionally verify AWS secrets in a given set of bytes. @@ -161,7 +161,7 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result if !ok { continue } - s1.Verified = item.verified + s1.Verified = item.isVerified s1.ExtraData = item.extra if item.verificationErr != nil { s1.SetVerificationError(item.verificationErr, resSecretMatch) @@ -174,7 +174,7 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result if verify { isVerified, extraData, verificationErr := s.verifyMatch(ctx, resIDMatch, resSecretMatch, true) s1.Verified = isVerified - cacheItem.verified = isVerified + cacheItem.isVerified = isVerified // It'd be good to log when calculated account value does not match // the account value from verification. Should only be edge cases at most. // if extraData["account"] != s1.ExtraData["account"] && extraData["account"] != "" {//log here} From 72c97223d1666399438c45c896e083f7135f3e4e Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 15:51:06 -0800 Subject: [PATCH 22/29] use ptr for the cacheItem --- pkg/detectors/aws/aws.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index c12cd592260d..2340d80a72dc 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -192,7 +192,7 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result } // Cache the result. - s.credsCache.Set(rawV2, cacheItem) + s.credsCache.Set(rawV2, &cacheItem) if !s1.Verified { // Unverified results that contain common test words are probably not secrets if detectors.IsKnownFalsePositive(resSecretMatch, detectors.DefaultFalsePositives, true) { From eee751fcebca7002133c108a410ce10449d2bb3c Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 16:01:41 -0800 Subject: [PATCH 23/29] use constructor --- pkg/detectors/aws/aws.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index 2340d80a72dc..1eb058a7d7b0 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -119,6 +119,14 @@ type cacheItem struct { isVerified bool } +func newCacheItem(isVerified bool, verificationErr error, extra map[string]string) *cacheItem { + item := &cacheItem{verificationErr: verificationErr, isVerified: isVerified} + if extra != nil { + item.extra = extra + } + return item +} + // FromData will find and optionally verify AWS secrets in a given set of bytes. func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) @@ -170,11 +178,9 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result continue } - var cacheItem cacheItem if verify { isVerified, extraData, verificationErr := s.verifyMatch(ctx, resIDMatch, resSecretMatch, true) s1.Verified = isVerified - cacheItem.isVerified = isVerified // It'd be good to log when calculated account value does not match // the account value from verification. Should only be edge cases at most. // if extraData["account"] != s1.ExtraData["account"] && extraData["account"] != "" {//log here} @@ -184,15 +190,13 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result for k, v := range extraData { s1.ExtraData[k] = v } - cacheItem.extra = s1.ExtraData if verificationErr != nil { s1.SetVerificationError(verificationErr, resSecretMatch) - cacheItem.verificationErr = verificationErr } } // Cache the result. - s.credsCache.Set(rawV2, &cacheItem) + s.credsCache.Set(rawV2, newCacheItem(s1.Verified, s1.VerificationError(), s1.ExtraData)) if !s1.Verified { // Unverified results that contain common test words are probably not secrets if detectors.IsKnownFalsePositive(resSecretMatch, detectors.DefaultFalsePositives, true) { From a08343f99199b5095f17a32d9af6bf235becd233 Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 16:03:39 -0800 Subject: [PATCH 24/29] remove nil check --- pkg/detectors/aws/aws.go | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index 1eb058a7d7b0..b2a88044d3f0 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -120,11 +120,7 @@ type cacheItem struct { } func newCacheItem(isVerified bool, verificationErr error, extra map[string]string) *cacheItem { - item := &cacheItem{verificationErr: verificationErr, isVerified: isVerified} - if extra != nil { - item.extra = extra - } - return item + return &cacheItem{verificationErr: verificationErr, isVerified: isVerified, extra: extra} } // FromData will find and optionally verify AWS secrets in a given set of bytes. @@ -171,9 +167,7 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result } s1.Verified = item.isVerified s1.ExtraData = item.extra - if item.verificationErr != nil { - s1.SetVerificationError(item.verificationErr, resSecretMatch) - } + s1.SetVerificationError(item.verificationErr, resSecretMatch) results = append(results, s1) continue } From 0a4155b2dd393432555ecb89de2b05c27b177469 Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 16:07:15 -0800 Subject: [PATCH 25/29] add method to popualte result from the cache item --- pkg/detectors/aws/aws.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index b2a88044d3f0..e3de838e0e3e 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -123,6 +123,13 @@ func newCacheItem(isVerified bool, verificationErr error, extra map[string]strin return &cacheItem{verificationErr: verificationErr, isVerified: isVerified, extra: extra} } +// populateResult populates the given detectors.Result with the values from the cacheItem. +func (c *cacheItem) populateResult(result *detectors.Result) { + result.Verified = c.isVerified + result.ExtraData = c.extra + result.SetVerificationError(c.verificationErr) +} + // FromData will find and optionally verify AWS secrets in a given set of bytes. func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { dataStr := string(data) @@ -165,9 +172,7 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result if !ok { continue } - s1.Verified = item.isVerified - s1.ExtraData = item.extra - s1.SetVerificationError(item.verificationErr, resSecretMatch) + item.populateResult(&s1) results = append(results, s1) continue } From dac2eb14090bd5d9d8377d3173227a1c6e8a2fa2 Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 16:10:08 -0800 Subject: [PATCH 26/29] update comment --- pkg/detectors/aws/aws.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index e3de838e0e3e..8a16bb5069bd 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -111,16 +111,16 @@ func GetHMAC(key []byte, data []byte) []byte { } // cacheItem represents an item stored in the cache, encompassing the outcome of a verification process. -// It includes the verification result, ExtraData, and any VerificationErrors encountered during verification. -// This struct facilitates the reconstruction of detectors.Result with values for previously verified credentials. +// It includes the verification result, ExtraData, and VerificationErrors encountered during verification. +// This facilitates the reconstruction of detectors.Result with values for previously verified creds. type cacheItem struct { - extra map[string]string - verificationErr error isVerified bool + verificationErr error + extra map[string]string } func newCacheItem(isVerified bool, verificationErr error, extra map[string]string) *cacheItem { - return &cacheItem{verificationErr: verificationErr, isVerified: isVerified, extra: extra} + return &cacheItem{isVerified, verificationErr, extra} } // populateResult populates the given detectors.Result with the values from the cacheItem. From a8703a2a02a000687c47dbbd8289b55779a47512 Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 16:13:09 -0800 Subject: [PATCH 27/29] missed a ptr --- pkg/detectors/aws/aws.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index 8a16bb5069bd..56e51775d410 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -168,7 +168,7 @@ func (s scanner) FromData(ctx context.Context, verify bool, data []byte) (result } if val, ok := s.credsCache.Get(rawV2); ok { - item, ok := val.(cacheItem) + item, ok := val.(*cacheItem) if !ok { continue } From 06f99da18635798bb3a52e4986a2ae70b6fcc113 Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 17:05:26 -0800 Subject: [PATCH 28/29] add test --- pkg/detectors/aws/aws_test.go | 55 +++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/pkg/detectors/aws/aws_test.go b/pkg/detectors/aws/aws_test.go index 8ab69552e0e4..a6da72a3fa4f 100644 --- a/pkg/detectors/aws/aws_test.go +++ b/pkg/detectors/aws/aws_test.go @@ -7,11 +7,14 @@ import ( "context" "crypto/sha256" "fmt" + "net/http" "testing" "time" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + + "github.com/trufflesecurity/trufflehog/v3/pkg/cache/memory" "github.com/trufflesecurity/trufflehog/v3/pkg/common" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" @@ -326,6 +329,58 @@ func TestAWS_FromChunk(t *testing.T) { } } +// TestAWSFromDataCacheDuplicateCreds tests that duplicate credentials are not verified against the AWS API +// multiple times. +func TestAWSFromDataCacheDuplicateCreds(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors4") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("AWS") + id := testSecrets.MustGetField("AWS_ID") + + // Mock HTTP client to intercept AWS API requests and count them. + apiCallCount := 0 + mockClient := &http.Client{ + Transport: roundTripperFunc(func(req *http.Request) (*http.Response, error) { + apiCallCount++ + return &http.Response{StatusCode: http.StatusOK}, nil + }), + } + + s := scanner{verificationClient: mockClient, credsCache: memory.New()} + + testData := []byte(fmt.Sprintf("You can find a aws secret %s within aws %s", secret, id)) + + // First call - expect cache to be empty and an API call to be made. + _, err = s.FromData(context.Background(), true, testData) + if err != nil { + t.Fatalf("Error processing data: %s", err) + } + if apiCallCount != 1 { + t.Fatalf("Expected 1 API call, got %d", apiCallCount) + } + + // Second call with the same data - expect cache to be used and no additional API calls. + _, err = s.FromData(context.Background(), true, testData) + if err != nil { + t.Fatalf("Error processing data: %s", err) + } + if apiCallCount != 1 { + t.Fatalf("Cache did not work as expected, API call count: %d", apiCallCount) + } +} + +// roundTripperFunc type is an adapter to allow the use of ordinary functions as HTTP round trippers. +type roundTripperFunc func(*http.Request) (*http.Response, error) + +func (f roundTripperFunc) RoundTrip(r *http.Request) (*http.Response, error) { + return f(r) +} + func BenchmarkFromData(benchmark *testing.B) { ctx := context.Background() s := scanner{} From a586b5f08286697c52a3afbafcb610495e4368b2 Mon Sep 17 00:00:00 2001 From: Ahrav Dutta Date: Sun, 7 Jan 2024 17:06:04 -0800 Subject: [PATCH 29/29] rename --- pkg/detectors/aws/aws_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/detectors/aws/aws_test.go b/pkg/detectors/aws/aws_test.go index a6da72a3fa4f..a01841eda3a4 100644 --- a/pkg/detectors/aws/aws_test.go +++ b/pkg/detectors/aws/aws_test.go @@ -351,12 +351,12 @@ func TestAWSFromDataCacheDuplicateCreds(t *testing.T) { }), } - s := scanner{verificationClient: mockClient, credsCache: memory.New()} + detector := scanner{verificationClient: mockClient, credsCache: memory.New()} testData := []byte(fmt.Sprintf("You can find a aws secret %s within aws %s", secret, id)) // First call - expect cache to be empty and an API call to be made. - _, err = s.FromData(context.Background(), true, testData) + _, err = detector.FromData(context.Background(), true, testData) if err != nil { t.Fatalf("Error processing data: %s", err) } @@ -365,7 +365,7 @@ func TestAWSFromDataCacheDuplicateCreds(t *testing.T) { } // Second call with the same data - expect cache to be used and no additional API calls. - _, err = s.FromData(context.Background(), true, testData) + _, err = detector.FromData(context.Background(), true, testData) if err != nil { t.Fatalf("Error processing data: %s", err) }