Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(fs): use git commit hash as cache key for clean repositories #8278

Merged
merged 14 commits into from
Jan 27, 2025
Merged
59 changes: 56 additions & 3 deletions pkg/fanal/artifact/local/fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"strings"
"sync"

"github.com/go-git/go-git/v5"
"github.com/google/wire"
"github.com/opencontainers/go-digest"
"golang.org/x/xerrors"
Expand All @@ -20,6 +21,7 @@ import (
"github.com/aquasecurity/trivy/pkg/fanal/handler"
"github.com/aquasecurity/trivy/pkg/fanal/types"
"github.com/aquasecurity/trivy/pkg/fanal/walker"
"github.com/aquasecurity/trivy/pkg/log"
"github.com/aquasecurity/trivy/pkg/semaphore"
)

Expand All @@ -45,6 +47,7 @@ type Artifact struct {
handlerManager handler.Manager

artifactOption artifact.Option
commitHash string // only set when the git repository is clean
}

func NewArtifact(rootPath string, c cache.ArtifactCache, w Walker, opt artifact.Option) (artifact.Artifact, error) {
Expand All @@ -58,14 +61,55 @@ func NewArtifact(rootPath string, c cache.ArtifactCache, w Walker, opt artifact.
return nil, xerrors.Errorf("analyzer group error: %w", err)
}

return Artifact{
art := Artifact{
rootPath: filepath.ToSlash(filepath.Clean(rootPath)),
cache: c,
walker: w,
analyzer: a,
handlerManager: handlerManager,
artifactOption: opt,
}, nil
}

// Check if the directory is a git repository and clean
if hash, err := getCleanGitHash(art.rootPath); err == nil {
art.commitHash = hash
} else {
log.WithPrefix("fs").Debug("Random cache key will be used", log.Err(err))
}

return art, nil
}

// getCleanGitHash returns the commit hash if the repository is clean, otherwise returns an error
func getCleanGitHash(dir string) (string, error) {
repo, err := git.PlainOpen(dir)
if err != nil {
return "", xerrors.Errorf("failed to open git repository: %w", err)
}

// Get the working tree
worktree, err := repo.Worktree()
if err != nil {
return "", xerrors.Errorf("failed to get worktree: %w", err)
}

// Get the current status
status, err := worktree.Status()
if err != nil {
return "", xerrors.Errorf("failed to get status: %w", err)
}

if !status.IsClean() {
return "", xerrors.New("repository is dirty")
}

// Get the HEAD commit hash
head, err := repo.Head()
if err != nil {
return "", xerrors.Errorf("failed to get HEAD: %w", err)
}

return head.Hash().String(), nil
}

func (a Artifact) Inspect(ctx context.Context) (artifact.Reference, error) {
Expand Down Expand Up @@ -169,11 +213,20 @@ func (a Artifact) Inspect(ctx context.Context) (artifact.Reference, error) {
}

func (a Artifact) Clean(reference artifact.Reference) error {
// Don't delete cache if it's a clean git repository
if a.commitHash != "" {
return nil
}
return a.cache.DeleteBlobs(reference.BlobIDs)
}

func (a Artifact) calcCacheKey(blobInfo types.BlobInfo) (string, error) {
// calculate hash of JSON and use it as pseudo artifactID and blobID
// If this is a clean git repository, use the commit hash as cache key
if a.commitHash != "" {
return cache.CalcKey(a.commitHash, a.analyzer.AnalyzerVersions(), a.handlerManager.Versions(), a.artifactOption)
}

// For non-git repositories or dirty git repositories, use the blob info
h := sha256.New()
if err := json.NewEncoder(h).Encode(blobInfo); err != nil {
return "", xerrors.Errorf("json error: %w", err)
Expand Down
84 changes: 76 additions & 8 deletions pkg/fanal/artifact/repo/git_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ package repo
import (
"context"
"net/http/httptest"
"os"
"path/filepath"
"testing"

"github.com/go-git/go-git/v5"
Expand All @@ -13,8 +15,11 @@ import (

"github.com/aquasecurity/trivy/internal/gittest"
"github.com/aquasecurity/trivy/pkg/cache"
"github.com/aquasecurity/trivy/pkg/fanal/analyzer"
"github.com/aquasecurity/trivy/pkg/fanal/artifact"
"github.com/aquasecurity/trivy/pkg/fanal/handler"
"github.com/aquasecurity/trivy/pkg/fanal/walker"
"github.com/aquasecurity/trivy/pkg/uuid"

_ "github.com/aquasecurity/trivy/pkg/fanal/analyzer/config/all"
_ "github.com/aquasecurity/trivy/pkg/fanal/analyzer/secret"
Expand Down Expand Up @@ -182,31 +187,89 @@ func TestNewArtifact(t *testing.T) {
}

func TestArtifact_Inspect(t *testing.T) {
ts, _ := setupGitRepository(t, "test-repo", "testdata/test-repo")
ts, repo := setupGitRepository(t, "test-repo", "testdata/test-repo")
defer ts.Close()

// Get the HEAD commit hash for verification
head, err := repo.Head()
require.NoError(t, err)
commitHash := head.Hash().String()

a, err := analyzer.NewAnalyzerGroup(analyzer.AnalyzerOptions{})
require.NoError(t, err)

handlerManager, err := handler.NewManager(artifact.Option{})
require.NoError(t, err)

wantCacheKey, err := cache.CalcKey(commitHash, a.AnalyzerVersions(), handlerManager.Versions(), artifact.Option{})
require.NoError(t, err)

worktree, err := repo.Worktree()
require.NoError(t, err)

localPath := worktree.Filesystem.Root()

tests := []struct {
name string
rawurl string
want artifact.Reference
wantErr bool
name string
rawurl string
modifyDir func(t *testing.T, dir string)
want artifact.Reference
wantErr bool
}{
{
name: "happy path",
name: "remote repo",
rawurl: ts.URL + "/test-repo.git",
want: artifact.Reference{
Name: ts.URL + "/test-repo.git",
Type: artifact.TypeRepository,
ID: "sha256:88233504639eb201433a0505956309ba0c48156f45beb786f95ccd3e8a343e9d",
ID: wantCacheKey, // Calculated from commit hash
BlobIDs: []string{
wantCacheKey, // Calculated from commit hash
},
},
},
{
name: "local repo",
rawurl: localPath,
want: artifact.Reference{
Name: localPath,
Type: artifact.TypeRepository,
ID: wantCacheKey, // Calculated from commit hash
BlobIDs: []string{
wantCacheKey, // Calculated from commit hash
},
},
},
{
name: "dirty repository",
rawurl: localPath,
modifyDir: func(t *testing.T, dir string) {
require.NoError(t, os.WriteFile(filepath.Join(dir, "new-file.txt"), []byte("test"), 0644))
t.Cleanup(func() {
require.NoError(t, os.Remove(filepath.Join(dir, "new-file.txt")))
})
},
want: artifact.Reference{
Name: localPath,
Type: artifact.TypeRepository,
ID: "sha256:88233504639eb201433a0505956309ba0c48156f45beb786f95ccd3e8a343e9d", // Calculated from UUID
BlobIDs: []string{
"sha256:88233504639eb201433a0505956309ba0c48156f45beb786f95ccd3e8a343e9d",
"sha256:88233504639eb201433a0505956309ba0c48156f45beb786f95ccd3e8a343e9d", // Calculated from UUID
},
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Set fake UUID for consistency
uuid.SetFakeUUID(t, "3ff14136-e09f-4df9-80ea-%012d")

// Apply modifications to make the repository dirty if specified
if tt.modifyDir != nil {
tt.modifyDir(t, tt.rawurl)
}

fsCache, err := cache.NewFSCache(t.TempDir())
require.NoError(t, err)

Expand All @@ -215,6 +278,11 @@ func TestArtifact_Inspect(t *testing.T) {
defer cleanup()

ref, err := art.Inspect(context.Background())
if tt.wantErr {
assert.Error(t, err)
return
}

require.NoError(t, err)
assert.Equal(t, tt.want, ref)
})
Expand Down
Loading