Smarter Chains: check taskrun level results for Subjects

Step 1/2 of #850 Prior, Chains only looks for pipeline results to understand what artifacts were generated in a pipeline. That means pipeline authors need to name pipeline results in the type hinting way and propagate its value with individual TaskRun results. Now, Chains is able to dive into individual TaskRun results to understand what artifacts were generated throughout a pipeline. This way, pipeline authors no longer need to worry about the rules when writting a pipeline as long as they pull in right tasks that produce type hinting results. That said, the old behaviour - observing pipeline level results is reserved by introducing a configmap field `artifacts.pipelinerun.observe-mode` which allows configuring how chains observes the outputs. Signed-off-by: Chuang Wang <[email protected]>
tektoncd · Aug 2, 2023 · c58bf20 · c58bf20
1 parent 25e7a6c
commit c58bf20
Show file tree

Hide file tree

Showing 24 changed files with 322 additions and 51 deletions.
diff --git a/docs/config.md b/docs/config.md
@@ -64,9 +64,11 @@ Supported keys include:
 | `artifacts.pipelinerun.format` | The format to store `PipelineRun` payloads in. | `in-toto`, `slsa/v1`| `in-toto` |
 | `artifacts.pipelinerun.storage` | The storage backend to store `PipelineRun` signatures in. Multiple backends can be specified with comma-separated list ("tekton,oci"). To disable the `PipelineRun` artifact input an empty string ("").  | `tekton`, `oci`, `gcs`, `docdb`, `grafeas` | `tekton` |
 | `artifacts.pipelinerun.signer` | The signature backend to sign `PipelineRun` payloads with. | `x509`, `kms` | `x509` |
+| `artifacts.pipelinerun.observe-mode` | The way that Chains observes inputs & outputs of a PipelineRun. The default option `pr` configures Chains to only inspect Pipeline level params/results, whereas the option `tr` configures Chains to dive into child TaskRuns. | `pr`, `tr` | `pr` |
 
-> NOTE: For grafeas storage backend, currently we only support Container Analysis. We will make grafeas server address configurabe within a short time.
-> NOTE: `slsa/v1` is an alias of `in-toto` for backwards compatibility.
+> NOTE: 
+> - For grafeas storage backend, currently we only support Container Analysis. We will make grafeas server address configurabe within a short time.
+> - `slsa/v1` is an alias of `in-toto` for backwards compatibility.
 
 ### OCI Configuration
 

diff --git a/pkg/chains/formats/slsa/extract/extract.go b/pkg/chains/formats/slsa/extract/extract.go
@@ -27,19 +27,105 @@ import (
 	"github.com/in-toto/in-toto-golang/in_toto/slsa_provenance/common"
 	"github.com/tektoncd/chains/internal/backport"
 	"github.com/tektoncd/chains/pkg/artifacts"
+	"github.com/tektoncd/chains/pkg/chains/formats/slsa/internal/slsaconfig"
 	"github.com/tektoncd/chains/pkg/chains/objects"
 	"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1"
 	"knative.dev/pkg/logging"
 )
 
 // SubjectDigests returns software artifacts produced from the TaskRun/PipelineRun object
 // in the form of standard subject field of intoto statement.
-// The type hinting fields expected in results help identify the generated software artifacts.
+// The type hinting fields expected in TaskRun results help identify the generated software artifacts in a TaskRun/PipelineRun.
 // Valid type hinting fields must:
 //   - have suffix `IMAGE_URL` & `IMAGE_DIGEST` or `ARTIFACT_URI` & `ARTIFACT_DIGEST` pair.
 //   - the `*_DIGEST` field must be in the format of "<algorithm>:<actual-sha>" where the algorithm must be "sha256" and actual sha must be valid per https://github.com/opencontainers/image-spec/blob/main/descriptor.md#sha-256.
 //   - the `*_URL` or `*_URI` fields cannot be empty.
-func SubjectDigests(ctx context.Context, obj objects.TektonObject) []intoto.Subject {
+func SubjectDigests(ctx context.Context, obj objects.TektonObject, slsaconfig *slsaconfig.SlsaConfig) []intoto.Subject {
+	var subjects []intoto.Subject
+
+	switch obj.GetObject().(type) {
+	case *v1beta1.PipelineRun:
+		subjects = subjectsFromPipelineRun(ctx, obj, slsaconfig)
+	case *v1beta1.TaskRun:
+		subjects = subjectsFromTektonObject(ctx, obj)
+	}
+
+	sort.Slice(subjects, func(i, j int) bool {
+		return subjects[i].Name <= subjects[j].Name
+	})
+
+	return subjects
+}
+
+func subjectsFromPipelineRun(ctx context.Context, obj objects.TektonObject, slsaconfig *slsaconfig.SlsaConfig) []intoto.Subject {
+	logger := logging.FromContext(ctx)
+	// If the configured input/output observation mode is pipeline level, then
+	// call the generic function to parse the subject.
+	if slsaconfig.PrObserveMode == "pr" {
+		return subjectsFromTektonObject(ctx, obj)
+	}
+
+	// If the configured input/output observation mode is task level, then dive into
+	// individual taskruns and collect subjects.
+
+	var result []intoto.Subject
+
+	pro := obj.(*objects.PipelineRunObject)
+
+	pSpec := pro.Status.PipelineSpec
+	if pSpec != nil {
+		pipelineTasks := append(pSpec.Tasks, pSpec.Finally...)
+		for _, t := range pipelineTasks {
+			tr := pro.GetTaskRunFromTask(t.Name)
+			// Ignore Tasks that did not execute during the PipelineRun.
+			if tr == nil || tr.Status.CompletionTime == nil {
+				logger.Infof("taskrun status not found for task %s", t.Name)
+				continue
+			}
+
+			trSubjects := subjectsFromTektonObject(ctx, objects.NewTaskRunObject(tr))
+			for _, s := range trSubjects {
+				result = addSubject(result, s)
+			}
+		}
+	}
+
+	return result
+}
+
+// addSubject adds a new subject item to the original slice.
+func addSubject(original []intoto.Subject, item intoto.Subject) []intoto.Subject {
+
+	for i, s := range original {
+		// if there is an equivalent entry in the original slice, do nothing or replace
+		// the original entry with the item if the item has more rich digest set.
+		if subjectEqual(s, item) {
+			if len(s.Digest) < len(item.Digest) {
+				original[i] = item
+			}
+			return original
+		}
+	}
+
+	original = append(original, item)
+	return original
+}
+
+// two subjects are equal if and only if they have same name and have at least
+// one common algorithm and hex value.
+func subjectEqual(x, y intoto.Subject) bool {
+	if x.Name != y.Name {
+		return false
+	}
+	for algo, hex := range x.Digest {
+		if v, ok := y.Digest[algo]; ok && v == hex {
+			return true
+		}
+	}
+	return false
+}
+
+func subjectsFromTektonObject(ctx context.Context, obj objects.TektonObject) []intoto.Subject {
 	logger := logging.FromContext(ctx)
 	var subjects []intoto.Subject
 
@@ -121,19 +207,17 @@ func SubjectDigests(ctx context.Context, obj objects.TektonObject) []intoto.Subj
 			})
 		}
 	}
-	sort.Slice(subjects, func(i, j int) bool {
-		return subjects[i].Name <= subjects[j].Name
-	})
+
 	return subjects
 }
 
 // RetrieveAllArtifactURIs returns all the URIs of the software artifacts produced from the run object.
 // - It first extracts intoto subjects from run object results and converts the subjects
 // to a slice of string URIs in the format of "NAME" + "@" + "ALGORITHM" + ":" + "DIGEST".
 // - If no subjects could be extracted from results, then an empty slice is returned.
-func RetrieveAllArtifactURIs(ctx context.Context, obj objects.TektonObject) []string {
+func RetrieveAllArtifactURIs(ctx context.Context, obj objects.TektonObject, observeMode string) []string {
 	result := []string{}
-	subjects := SubjectDigests(ctx, obj)
+	subjects := SubjectDigests(ctx, obj, &slsaconfig.SlsaConfig{PrObserveMode: observeMode})
 
 	for _, s := range subjects {
 		for algo, digest := range s.Digest {

diff --git a/pkg/chains/formats/slsa/extract/extract_test.go b/pkg/chains/formats/slsa/extract/extract_test.go
@@ -19,13 +19,16 @@ package extract_test
 import (
 	"fmt"
 	"testing"
+	"time"
 
 	"github.com/google/go-cmp/cmp"
 	"github.com/google/go-cmp/cmp/cmpopts"
 	intoto "github.com/in-toto/in-toto-golang/in_toto"
 	"github.com/tektoncd/chains/pkg/chains/formats/slsa/extract"
+	"github.com/tektoncd/chains/pkg/chains/formats/slsa/internal/slsaconfig"
 	"github.com/tektoncd/chains/pkg/chains/objects"
 	"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	logtesting "knative.dev/pkg/logging/testing"
 )
 
@@ -102,16 +105,15 @@ func TestSubjectDigestsAndRetrieveAllArtifactURIs(t *testing.T) {
 			// test both taskrun object and pipelinerun object
 			runObjects := []objects.TektonObject{
 				createTaskRunObjectWithResults(tc.results),
-				createPipelineRunObjectWithResults(tc.results),
+				createPipelineRunObjectWithPipelineResults(tc.results),
 			}
-
 			for _, o := range runObjects {
-				gotSubjects := extract.SubjectDigests(ctx, o)
+				gotSubjects := extract.SubjectDigests(ctx, o, &slsaconfig.SlsaConfig{PrObserveMode: "pr"})
 				if diff := cmp.Diff(tc.wantSubjects, gotSubjects, cmpopts.SortSlices(func(x, y intoto.Subject) bool { return x.Name < y.Name })); diff != "" {
-					t.Errorf("Wrong subjects extracted, diff=%s", diff)
+					t.Errorf("Wrong subjects extracted, diff=%s, %s", diff, gotSubjects)
 				}
 
-				gotURIs := extract.RetrieveAllArtifactURIs(ctx, o)
+				gotURIs := extract.RetrieveAllArtifactURIs(ctx, o, "pr")
 				if diff := cmp.Diff(tc.wantFullURLs, gotURIs, cmpopts.SortSlices(func(x, y string) bool { return x < y })); diff != "" {
 					t.Errorf("Wrong URIs extracted, diff=%s", diff)
 				}
@@ -121,6 +123,107 @@ func TestSubjectDigestsAndRetrieveAllArtifactURIs(t *testing.T) {
 	}
 }
 
+func TestPipelineRunObserveModeForSubjects(t *testing.T) {
+	var tests = []struct {
+		name         string
+		pro          objects.TektonObject
+		observeMode  string
+		wantSubjects []intoto.Subject
+		wantFullURLs []string
+	}{
+		{
+			name:        "observe mode: pr",
+			pro:         createPipelineRunObjectWithPipelineResults(map[string]string{artifactURL1: "sha256:" + artifactDigest1}),
+			observeMode: "pr",
+			wantSubjects: []intoto.Subject{
+				{
+					Name: artifactURL1,
+					Digest: map[string]string{
+						"sha256": artifactDigest1,
+					},
+				},
+			},
+			wantFullURLs: []string{fmt.Sprintf("%s@sha256:%s", artifactURL1, artifactDigest1)},
+		},
+		{
+			name:        "observe mode: tr, no duplication",
+			pro:         createPipelineRunObjectWithTaskRunResults([]artifact{{uri: artifactURL2, digest: "sha256:" + artifactDigest2}}),
+			observeMode: "tr",
+			wantSubjects: []intoto.Subject{
+				{
+					Name: artifactURL2,
+					Digest: map[string]string{
+						"sha256": artifactDigest2,
+					},
+				},
+			},
+			wantFullURLs: []string{fmt.Sprintf("%s@sha256:%s", artifactURL2, artifactDigest2)},
+		},
+		{
+			name: "observe mode: tr - same uri with different sha256 digests",
+			pro: createPipelineRunObjectWithTaskRunResults([]artifact{
+				{uri: artifactURL2, digest: "sha256:" + artifactDigest1},
+				{uri: artifactURL2, digest: "sha256:" + artifactDigest2},
+			}),
+			observeMode: "tr",
+			wantSubjects: []intoto.Subject{
+				{
+					Name: artifactURL2,
+					Digest: map[string]string{
+						"sha256": artifactDigest2,
+					},
+				},
+				{
+					Name: artifactURL2,
+					Digest: map[string]string{
+						"sha256": artifactDigest1,
+					},
+				},
+			},
+			wantFullURLs: []string{
+				fmt.Sprintf("%s@sha256:%s", artifactURL2, artifactDigest1),
+				fmt.Sprintf("%s@sha256:%s", artifactURL2, artifactDigest2),
+			},
+		},
+		{
+			name: "observe mode: tr - same uri with same sha256 digests",
+			pro: createPipelineRunObjectWithTaskRunResults([]artifact{
+				{uri: artifactURL2, digest: "sha256:" + artifactDigest2},
+				{uri: artifactURL2, digest: "sha256:" + artifactDigest2},
+			}),
+			observeMode: "tr",
+			wantSubjects: []intoto.Subject{
+				{
+					Name: artifactURL2,
+					Digest: map[string]string{
+						"sha256": artifactDigest2,
+					},
+				},
+			},
+			wantFullURLs: []string{
+				fmt.Sprintf("%s@sha256:%s", artifactURL2, artifactDigest2),
+			},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			ctx := logtesting.TestContextWithLogger(t)
+			// test both taskrun object and pipelinerun object
+
+			gotSubjects := extract.SubjectDigests(ctx, tc.pro, &slsaconfig.SlsaConfig{PrObserveMode: tc.observeMode})
+			if diff := cmp.Diff(tc.wantSubjects, gotSubjects, cmpopts.SortSlices(func(x, y intoto.Subject) bool { return x.Name < y.Name })); diff != "" {
+				t.Errorf("Wrong subjects extracted, diff=%s, %s", diff, gotSubjects)
+			}
+
+			gotURIs := extract.RetrieveAllArtifactURIs(ctx, tc.pro, tc.observeMode)
+			if diff := cmp.Diff(tc.wantFullURLs, gotURIs, cmpopts.SortSlices(func(x, y string) bool { return x < y })); diff != "" {
+				t.Errorf("Wrong URIs extracted, diff=%s", diff)
+			}
+		})
+	}
+}
+
 func createTaskRunObjectWithResults(results map[string]string) objects.TektonObject {
 	trResults := []v1beta1.TaskRunResult{}
 	prefix := 0
@@ -143,7 +246,7 @@ func createTaskRunObjectWithResults(results map[string]string) objects.TektonObj
 	)
 }
 
-func createPipelineRunObjectWithResults(results map[string]string) objects.TektonObject {
+func createPipelineRunObjectWithPipelineResults(results map[string]string) objects.TektonObject {
 	prResults := []v1beta1.PipelineRunResult{}
 	prefix := 0
 	for url, digest := range results {
@@ -164,3 +267,44 @@ func createPipelineRunObjectWithResults(results map[string]string) objects.Tekto
 		},
 	)
 }
+
+type artifact struct {
+	uri    string
+	digest string
+}
+
+// create a child taskrun for each result
+func createPipelineRunObjectWithTaskRunResults(results []artifact) objects.TektonObject {
+	pro := objects.NewPipelineRunObject(&v1beta1.PipelineRun{
+		Status: v1beta1.PipelineRunStatus{
+			PipelineRunStatusFields: v1beta1.PipelineRunStatusFields{
+				PipelineSpec: &v1beta1.PipelineSpec{},
+			},
+		},
+	})
+
+	// create child taskruns with results and pipelinetask
+	prefix := 0
+	for _, r := range results {
+		// simulate child taskruns
+		pipelineTaskName := fmt.Sprintf("task-%d", prefix)
+		tr := &v1beta1.TaskRun{
+			ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{objects.PipelineTaskLabel: pipelineTaskName}},
+			Status: v1beta1.TaskRunStatus{
+				TaskRunStatusFields: v1beta1.TaskRunStatusFields{
+					CompletionTime: &metav1.Time{Time: time.Date(1995, time.December, 24, 6, 12, 12, 24, time.UTC)},
+					TaskRunResults: []v1beta1.TaskRunResult{
+						{Name: fmt.Sprintf("%v_IMAGE_DIGEST", prefix), Value: *v1beta1.NewStructuredValues(r.digest)},
+						{Name: fmt.Sprintf("%v_IMAGE_URL", prefix), Value: *v1beta1.NewStructuredValues(r.uri)},
+					},
+				},
+			},
+		}
+
+		pro.AppendTaskRun(tr)
+		pro.Status.PipelineSpec.Tasks = append(pro.Status.PipelineSpec.Tasks, v1beta1.PipelineTask{Name: pipelineTaskName})
+		prefix++
+	}
+
+	return pro
+}
diff --git a/pkg/chains/formats/slsa/internal/slsaconfig/slsaconfig.go b/pkg/chains/formats/slsa/internal/slsaconfig/slsaconfig.go
@@ -18,4 +18,6 @@ package slsaconfig
 type SlsaConfig struct {
 	// BuilderID is the URI of the trusted build platform.
 	BuilderID string
+	// PrObserveMode configures whether to observe the pipeline level or task level inputs/outputs for a given pipelinerun.
+	PrObserveMode string
 }
diff --git a/pkg/chains/formats/slsa/testdata/taskrun-multiple-subjects.json b/pkg/chains/formats/slsa/testdata/taskrun-multiple-subjects.json
@@ -28,7 +28,7 @@
         "taskResults": [
             {
                 "name": "IMAGES",
-                "value": "gcr.io/myimage@sha256:d4b63d3e24d6eef04a6dc0795cf8a73470688803d97c52cffa3c8d4efd3397b6,gcr.io/myimage@sha256:daa1a56e13c85cf164e7d9e595006649e3a04c47fe4a8261320e18a0bf3b0367"
+                "value": "gcr.io/myimage1@sha256:d4b63d3e24d6eef04a6dc0795cf8a73470688803d97c52cffa3c8d4efd3397b6,gcr.io/myimage2@sha256:daa1a56e13c85cf164e7d9e595006649e3a04c47fe4a8261320e18a0bf3b0367"
             }
         ],
         "taskSpec": {

diff --git a/pkg/chains/formats/slsa/testdata/v2alpha2/taskrun-multiple-subjects.json b/pkg/chains/formats/slsa/testdata/v2alpha2/taskrun-multiple-subjects.json
@@ -28,7 +28,7 @@
         "taskResults": [
             {
                 "name": "IMAGES",
-                "value": "gcr.io/myimage@sha256:d4b63d3e24d6eef04a6dc0795cf8a73470688803d97c52cffa3c8d4efd3397b6,gcr.io/myimage@sha256:daa1a56e13c85cf164e7d9e595006649e3a04c47fe4a8261320e18a0bf3b0367"
+                "value": "gcr.io/myimage1@sha256:d4b63d3e24d6eef04a6dc0795cf8a73470688803d97c52cffa3c8d4efd3397b6,gcr.io/myimage2@sha256:daa1a56e13c85cf164e7d9e595006649e3a04c47fe4a8261320e18a0bf3b0367"
             }
         ],
         "taskSpec": {

diff --git a/pkg/chains/formats/slsa/v1/intotoite6.go b/pkg/chains/formats/slsa/v1/intotoite6.go
@@ -45,7 +45,8 @@ type InTotoIte6 struct {
 func NewFormatter(cfg config.Config) (formats.Payloader, error) {
 	return &InTotoIte6{
 		slsaConfig: &slsaconfig.SlsaConfig{
-			BuilderID: cfg.Builder.ID,
+			BuilderID:     cfg.Builder.ID,
+			PrObserveMode: cfg.Artifacts.PipelineRuns.ObserveMode,
 		},
 	}, nil
 }