Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Code Analysis Framework Revamp #269

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions pkg/code/code_graph.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import (
"github.com/safedep/vet/pkg/storage/graph"
)

// This should just be a code walker and parser. Code graph is a separate
// concern, much larger and complex.
type CodeGraphBuilderConfig struct {
// Resolve imports to file and load them
RecursiveImport bool
Expand All @@ -28,9 +30,12 @@ type CodeGraphBuilderMetrics struct {

type CodeGraphBuilderEvent struct {
Kind string

// Add structured data here
Data interface{}
}

// Move this into separate events.go file
const (
CodeGraphBuilderEventFileQueued = "file_queued"
CodeGraphBuilderEventFileProcessed = "file_processed"
Expand All @@ -48,8 +53,13 @@ type codeGraphBuilder struct {
metrics CodeGraphBuilderMetrics

repository SourceRepository
lang SourceLanguage
storage graph.Graph

// Support multiple languages, may be selected by extension
lang SourceLanguage

// Decouple this. This should be a separate concern
// We can have a plugin / listener that can build the graph
storage graph.Graph

// Queue for processing files
fileQueue chan SourceFile
Expand Down Expand Up @@ -99,6 +109,7 @@ func (b *codeGraphBuilder) Build() error {
b.fileQueueWg = &sync.WaitGroup{}
b.fileQueueLock = &sync.Mutex{}

// Do we really need these caches?
b.fileCache = make(map[string]bool)
b.functionDeclCache = make(map[string]string)
b.functionCallCache = make(map[string]string)
Expand Down Expand Up @@ -126,6 +137,8 @@ func (b *codeGraphBuilder) enqueueSourceFile(file SourceFile) {
b.synchronized(func() {
b.metrics.FilesInQueue++

// Why do we need to cache files? Are we processing the same file multiple times?
// May be yes when it comes to imports
if _, ok := b.fileCache[file.Path]; ok {
logger.Debugf("Skipping already processed file: %s", file.Path)
return
Expand All @@ -138,12 +151,15 @@ func (b *codeGraphBuilder) enqueueSourceFile(file SourceFile) {
b.fileCache[file.Path] = true
})

// Make this more structured i.e. each Data is in its own
// struct (pointer)
b.notifyEventHandlers(CodeGraphBuilderEvent{
Kind: CodeGraphBuilderEventFileQueued,
Data: file,
}, b.metrics)
}

// Add context with cancellation support
func (b *codeGraphBuilder) fileProcessor(wg *sync.WaitGroup) {
for file := range b.fileQueue {
err := b.buildForFile(file)
Expand Down Expand Up @@ -176,6 +192,8 @@ func (b *codeGraphBuilder) buildForFile(file SourceFile) error {
return err
}

// Pass CST to analysers / callbacks

defer cst.Close()

b.processSourceFileNode(file)
Expand Down Expand Up @@ -242,6 +260,7 @@ func (b *codeGraphBuilder) processImportNodes(cst *nodes.CST, currentFile Source
importedPkgNode := b.buildPackageNode(importNodeName, importNodeName,
sourceFile.Path, b.importSourceName(sourceFile))

// This can be handled by callbacks
err = b.storage.Link(thisNode.Imports(&importedPkgNode))
if err != nil {
logger.Errorf("Failed to link import node: %v", err)
Expand Down
2 changes: 2 additions & 0 deletions pkg/common/purl/purl.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ func purlBuildLockfilePackageName(ecosystem lockfile.Ecosystem, group, name stri
return fmt.Sprintf("%s/%s", group, name)
case lockfile.MavenEcosystem:
return fmt.Sprintf("%s:%s", group, name)
case models.EcosystemGitHubActions:
return fmt.Sprintf("%s/%s", group, name)
default:
return name
}
Expand Down
9 changes: 9 additions & 0 deletions pkg/common/purl/purl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"testing"

"github.com/google/osv-scanner/pkg/lockfile"
"github.com/safedep/vet/pkg/models"
"github.com/stretchr/testify/assert"
)

Expand Down Expand Up @@ -41,6 +42,14 @@ func TestParsePackageUrl(t *testing.T) {
"",
errors.New("failed to map PURL type:unknown to known ecosystem"),
},
{
"Parse GitHub Actions PURL",
"pkg:actions/github/actions@v2",
lockfile.Ecosystem(models.EcosystemGitHubActions),
"github/actions",
"v2",
nil,
},
}

for _, test := range cases {
Expand Down
4 changes: 2 additions & 2 deletions pkg/reporter/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ func (s *syncReporter) syncEvent(event *analyzer.AnalyzerEvent) error {
}

logger.Debugf("Report Sync: Publishing policy violation for package: %s/%s/%s/%s",
pkg.GetSpecEcosystem(), pkg.Manifest.GetDisplayPath(), pkg.GetName(), pkg.GetVersion())
pkg.Manifest.GetControlTowerSpecEcosystem(), pkg.Manifest.GetDisplayPath(), pkg.GetName(), pkg.GetVersion())

namespace := pkg.Manifest.GetSource().GetNamespace()
req := controltowerv1.PublishPolicyViolationRequest{
Expand Down Expand Up @@ -398,7 +398,7 @@ func (s *syncReporter) syncPackage(pkg *models.Package) error {
}

logger.Debugf("Report Sync: Publishing package insight for package: %s/%s/%s/%s",
pkg.GetSpecEcosystem(), pkg.Manifest.GetDisplayPath(), pkg.GetName(), pkg.GetVersion())
pkg.Manifest.GetControlTowerSpecEcosystem(), pkg.Manifest.GetDisplayPath(), pkg.GetName(), pkg.GetVersion())

namespace := pkg.Manifest.GetSource().GetNamespace()
req := controltowerv1.PublishPackageInsightRequest{
Expand Down
Loading