From 6f8c2e6fe393195c9d427cd5be65506c4705d929 Mon Sep 17 00:00:00 2001 From: abhisek Date: Sun, 3 Nov 2024 19:15:55 +0530 Subject: [PATCH] fix: PURL handling for GitHub Actions --- pkg/code/code_graph.go | 23 +++++++++++++++++++++-- pkg/common/purl/purl.go | 2 ++ pkg/common/purl/purl_test.go | 9 +++++++++ pkg/reporter/sync.go | 4 ++-- 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/pkg/code/code_graph.go b/pkg/code/code_graph.go index 5a11199c..75ef0dd4 100644 --- a/pkg/code/code_graph.go +++ b/pkg/code/code_graph.go @@ -9,6 +9,8 @@ import ( "github.com/safedep/vet/pkg/storage/graph" ) +// This should just be a code walker and parser. Code graph is a separate +// concern, much larger and complex. type CodeGraphBuilderConfig struct { // Resolve imports to file and load them RecursiveImport bool @@ -28,9 +30,12 @@ type CodeGraphBuilderMetrics struct { type CodeGraphBuilderEvent struct { Kind string + + // Add structured data here Data interface{} } +// Move this into separate events.go file const ( CodeGraphBuilderEventFileQueued = "file_queued" CodeGraphBuilderEventFileProcessed = "file_processed" @@ -48,8 +53,13 @@ type codeGraphBuilder struct { metrics CodeGraphBuilderMetrics repository SourceRepository - lang SourceLanguage - storage graph.Graph + + // Support multiple languages, may be selected by extension + lang SourceLanguage + + // Decouple this. This should be a separate concern + // We can have a plugin / listener that can build the graph + storage graph.Graph // Queue for processing files fileQueue chan SourceFile @@ -99,6 +109,7 @@ func (b *codeGraphBuilder) Build() error { b.fileQueueWg = &sync.WaitGroup{} b.fileQueueLock = &sync.Mutex{} + // Do we really need these caches? b.fileCache = make(map[string]bool) b.functionDeclCache = make(map[string]string) b.functionCallCache = make(map[string]string) @@ -126,6 +137,8 @@ func (b *codeGraphBuilder) enqueueSourceFile(file SourceFile) { b.synchronized(func() { b.metrics.FilesInQueue++ + // Why do we need to cache files? Are we processing the same file multiple times? + // May be yes when it comes to imports if _, ok := b.fileCache[file.Path]; ok { logger.Debugf("Skipping already processed file: %s", file.Path) return @@ -138,12 +151,15 @@ func (b *codeGraphBuilder) enqueueSourceFile(file SourceFile) { b.fileCache[file.Path] = true }) + // Make this more structured i.e. each Data is in its own + // struct (pointer) b.notifyEventHandlers(CodeGraphBuilderEvent{ Kind: CodeGraphBuilderEventFileQueued, Data: file, }, b.metrics) } +// Add context with cancellation support func (b *codeGraphBuilder) fileProcessor(wg *sync.WaitGroup) { for file := range b.fileQueue { err := b.buildForFile(file) @@ -176,6 +192,8 @@ func (b *codeGraphBuilder) buildForFile(file SourceFile) error { return err } + // Pass CST to analysers / callbacks + defer cst.Close() b.processSourceFileNode(file) @@ -242,6 +260,7 @@ func (b *codeGraphBuilder) processImportNodes(cst *nodes.CST, currentFile Source importedPkgNode := b.buildPackageNode(importNodeName, importNodeName, sourceFile.Path, b.importSourceName(sourceFile)) + // This can be handled by callbacks err = b.storage.Link(thisNode.Imports(&importedPkgNode)) if err != nil { logger.Errorf("Failed to link import node: %v", err) diff --git a/pkg/common/purl/purl.go b/pkg/common/purl/purl.go index 482c1833..aae57889 100644 --- a/pkg/common/purl/purl.go +++ b/pkg/common/purl/purl.go @@ -55,6 +55,8 @@ func purlBuildLockfilePackageName(ecosystem lockfile.Ecosystem, group, name stri return fmt.Sprintf("%s/%s", group, name) case lockfile.MavenEcosystem: return fmt.Sprintf("%s:%s", group, name) + case models.EcosystemGitHubActions: + return fmt.Sprintf("%s/%s", group, name) default: return name } diff --git a/pkg/common/purl/purl_test.go b/pkg/common/purl/purl_test.go index 2b0076bb..c65ffe39 100644 --- a/pkg/common/purl/purl_test.go +++ b/pkg/common/purl/purl_test.go @@ -5,6 +5,7 @@ import ( "testing" "github.com/google/osv-scanner/pkg/lockfile" + "github.com/safedep/vet/pkg/models" "github.com/stretchr/testify/assert" ) @@ -41,6 +42,14 @@ func TestParsePackageUrl(t *testing.T) { "", errors.New("failed to map PURL type:unknown to known ecosystem"), }, + { + "Parse GitHub Actions PURL", + "pkg:actions/github/actions@v2", + lockfile.Ecosystem(models.EcosystemGitHubActions), + "github/actions", + "v2", + nil, + }, } for _, test := range cases { diff --git a/pkg/reporter/sync.go b/pkg/reporter/sync.go index 1bb577e2..7f293407 100644 --- a/pkg/reporter/sync.go +++ b/pkg/reporter/sync.go @@ -344,7 +344,7 @@ func (s *syncReporter) syncEvent(event *analyzer.AnalyzerEvent) error { } logger.Debugf("Report Sync: Publishing policy violation for package: %s/%s/%s/%s", - pkg.GetSpecEcosystem(), pkg.Manifest.GetDisplayPath(), pkg.GetName(), pkg.GetVersion()) + pkg.Manifest.GetControlTowerSpecEcosystem(), pkg.Manifest.GetDisplayPath(), pkg.GetName(), pkg.GetVersion()) namespace := pkg.Manifest.GetSource().GetNamespace() req := controltowerv1.PublishPolicyViolationRequest{ @@ -398,7 +398,7 @@ func (s *syncReporter) syncPackage(pkg *models.Package) error { } logger.Debugf("Report Sync: Publishing package insight for package: %s/%s/%s/%s", - pkg.GetSpecEcosystem(), pkg.Manifest.GetDisplayPath(), pkg.GetName(), pkg.GetVersion()) + pkg.Manifest.GetControlTowerSpecEcosystem(), pkg.Manifest.GetDisplayPath(), pkg.GetName(), pkg.GetVersion()) namespace := pkg.Manifest.GetSource().GetNamespace() req := controltowerv1.PublishPackageInsightRequest{