Skip to content

Commit

Permalink
refactor: move gitoid code to cyrptoutil, use digestvalue everywhere
Browse files Browse the repository at this point in the history
When the functionality to calculate gitoids was added, there was a bit
of tech debt incurred since they didn't implement hash.Hash. This
remedies this with an admitedly hacky implementation of hash.Hash that
wraps the gitoid code. This also standardizes our cryptoutil fucntions
around the DigestValue struct that was added around this time to
differentiate between gitoids and regular hash functions.

Signed-off-by: Mikhail Swift <[email protected]>
  • Loading branch information
mikhailswift committed Jan 29, 2024
1 parent 2cb096b commit f91a79c
Show file tree
Hide file tree
Showing 15 changed files with 130 additions and 60 deletions.
4 changes: 2 additions & 2 deletions attestation/aws-iid/aws-iid.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func init() {

type Attestor struct {
ec2metadata.EC2InstanceIdentityDocument
hashes []crypto.Hash
hashes []cryptoutil.DigestValue
session session.Session
conf *aws.Config
RawIID string `json:"rawiid"`
Expand Down Expand Up @@ -195,7 +195,7 @@ func (a *Attestor) Verify() error {
}

func (a *Attestor) Subjects() map[string]cryptoutil.DigestSet {
hashes := []crypto.Hash{crypto.SHA256}
hashes := []cryptoutil.DigestValue{{Hash: crypto.SHA256}}
subjects := make(map[string]cryptoutil.DigestSet)
if ds, err := cryptoutil.CalculateDigestSetFromBytes([]byte(a.EC2InstanceIdentityDocument.InstanceID), hashes); err == nil {
subjects[fmt.Sprintf("instanceid:%s", a.EC2InstanceIdentityDocument.InstanceID)] = ds
Expand Down
3 changes: 1 addition & 2 deletions attestation/commandrun/tracing_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package commandrun

import (
"bytes"
"crypto"
"fmt"
"os"
"os/exec"
Expand All @@ -42,7 +41,7 @@ type ptraceContext struct {
mainProgram string
processes map[int]*ProcessInfo
exitCode int
hash []crypto.Hash
hash []cryptoutil.DigestValue
environmentBlockList map[string]struct{}
}

Expand Down
10 changes: 5 additions & 5 deletions attestation/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func WithContext(ctx context.Context) AttestationContextOption {
}
}

func WithHashes(hashes []crypto.Hash) AttestationContextOption {
func WithHashes(hashes []cryptoutil.DigestValue) AttestationContextOption {
return func(ctx *AttestationContext) {
if len(hashes) > 0 {
ctx.hashes = hashes
Expand All @@ -83,7 +83,7 @@ type AttestationContext struct {
ctx context.Context
attestors []Attestor
workingDir string
hashes []crypto.Hash
hashes []cryptoutil.DigestValue
completedAttestors []CompletedAttestor
products map[string]Product
materials map[string]cryptoutil.DigestSet
Expand All @@ -104,7 +104,7 @@ func NewContext(attestors []Attestor, opts ...AttestationContextOption) (*Attest
ctx: context.Background(),
attestors: attestors,
workingDir: wd,
hashes: []crypto.Hash{crypto.SHA256},
hashes: []cryptoutil.DigestValue{{Hash: crypto.SHA256}, {Hash: crypto.SHA256, GitOID: true}, {Hash: crypto.SHA1, GitOID: true}},
materials: make(map[string]cryptoutil.DigestSet),
products: make(map[string]Product),
}
Expand Down Expand Up @@ -222,8 +222,8 @@ func (ctx *AttestationContext) WorkingDir() string {
return ctx.workingDir
}

func (ctx *AttestationContext) Hashes() []crypto.Hash {
hashes := make([]crypto.Hash, len(ctx.hashes))
func (ctx *AttestationContext) Hashes() []cryptoutil.DigestValue {
hashes := make([]cryptoutil.DigestValue, len(ctx.hashes))
copy(hashes, ctx.hashes)
return hashes
}
Expand Down
29 changes: 1 addition & 28 deletions attestation/file/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,18 @@
package file

import (
"crypto"
"io/fs"
"os"
"path/filepath"

"github.com/edwarnicke/gitoid"
"github.com/in-toto/go-witness/cryptoutil"
"github.com/in-toto/go-witness/log"
)

// recordArtifacts will walk basePath and record the digests of each file with each of the functions in hashes.
// If file already exists in baseArtifacts and the two artifacts are equal the artifact will not be in the
// returned map of artifacts.
func RecordArtifacts(basePath string, baseArtifacts map[string]cryptoutil.DigestSet, hashes []crypto.Hash, visitedSymlinks map[string]struct{}) (map[string]cryptoutil.DigestSet, error) {
func RecordArtifacts(basePath string, baseArtifacts map[string]cryptoutil.DigestSet, hashes []cryptoutil.DigestValue, visitedSymlinks map[string]struct{}) (map[string]cryptoutil.DigestSet, error) {
artifacts := make(map[string]cryptoutil.DigestSet)
err := filepath.Walk(basePath, func(path string, info fs.FileInfo, err error) error {
if err != nil {
Expand Down Expand Up @@ -80,31 +78,6 @@ func RecordArtifacts(basePath string, baseArtifacts map[string]cryptoutil.Digest
return err
}

fileReader, err := os.Open(path)
if err != nil {
return err
}

goidSha1, err := gitoid.New(fileReader)
if err != nil {
return err
}

goidSha256, err := gitoid.New(fileReader, gitoid.WithSha256())
if err != nil {
return err
}

artifact[cryptoutil.DigestValue{
Hash: crypto.SHA1,
GitOID: true,
}] = goidSha1.URI()

artifact[cryptoutil.DigestValue{
Hash: crypto.SHA256,
GitOID: true,
}] = goidSha256.URI()

if shouldRecord(relPath, artifact, baseArtifacts) {
artifacts[relPath] = artifact
}
Expand Down
6 changes: 3 additions & 3 deletions attestation/file/file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ func TestBrokenSymlink(t *testing.T) {
symTestDir := filepath.Join(dir, "symTestDir")
require.NoError(t, os.Symlink(testDir, symTestDir))

_, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []crypto.Hash{crypto.SHA256}, map[string]struct{}{})
_, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{})
require.NoError(t, err)

// remove the symlinks and make sure we don't get an error back
require.NoError(t, os.RemoveAll(testDir))
require.NoError(t, os.RemoveAll(testFile))
_, err = RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []crypto.Hash{crypto.SHA256}, map[string]struct{}{})
_, err = RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{})
require.NoError(t, err)
}

Expand All @@ -58,6 +58,6 @@ func TestSymlinkCycle(t *testing.T) {
require.NoError(t, os.Symlink(dir, symTestDir))

// if a symlink cycle weren't properly handled this would be an infinite loop
_, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []crypto.Hash{crypto.SHA256}, map[string]struct{}{})
_, err := RecordArtifacts(dir, map[string]cryptoutil.DigestSet{}, []cryptoutil.DigestValue{{Hash: crypto.SHA256}}, map[string]struct{}{})
require.NoError(t, err)
}
2 changes: 1 addition & 1 deletion attestation/gcp-iit/gcp-iit.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ func (a *Attestor) getInstanceData() {

func (a *Attestor) Subjects() map[string]cryptoutil.DigestSet {
subjects := make(map[string]cryptoutil.DigestSet)
hashes := []crypto.Hash{crypto.SHA256}
hashes := []cryptoutil.DigestValue{{Hash: crypto.SHA256}}
if ds, err := cryptoutil.CalculateDigestSetFromBytes([]byte(a.InstanceID), hashes); err == nil {
subjects[fmt.Sprintf("instanceid:%v", a.InstanceID)] = ds
} else {
Expand Down
2 changes: 1 addition & 1 deletion attestation/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ func (a *Attestor) Attest(ctx *attestation.AttestationContext) error {

func (a *Attestor) Subjects() map[string]cryptoutil.DigestSet {
subjects := make(map[string]cryptoutil.DigestSet)
hashes := []crypto.Hash{crypto.SHA256}
hashes := []cryptoutil.DigestValue{{Hash: crypto.SHA256}}

subjectName := fmt.Sprintf("commithash:%v", a.CommitHash)
subjects[subjectName] = cryptoutil.DigestSet{
Expand Down
2 changes: 1 addition & 1 deletion attestation/github/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ func (a *Attestor) Attest(ctx *attestation.AttestationContext) error {
// Subjects returns a map of subjects and their corresponding digest sets.
func (a *Attestor) Subjects() map[string]cryptoutil.DigestSet {
subjects := make(map[string]cryptoutil.DigestSet)
hashes := []crypto.Hash{crypto.SHA256}
hashes := []cryptoutil.DigestValue{{Hash: crypto.SHA256}}
if pipelineSubj, err := cryptoutil.CalculateDigestSetFromBytes([]byte(a.PipelineUrl), hashes); err == nil {
subjects[fmt.Sprintf("pipelineurl:%v", a.PipelineUrl)] = pipelineSubj
} else {
Expand Down
2 changes: 1 addition & 1 deletion attestation/gitlab/gitlab.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ func (a *Attestor) Attest(ctx *attestation.AttestationContext) error {

func (a *Attestor) Subjects() map[string]cryptoutil.DigestSet {
subjects := make(map[string]cryptoutil.DigestSet)
hashes := []crypto.Hash{crypto.SHA256}
hashes := []cryptoutil.DigestValue{{Hash: crypto.SHA256}}
if ds, err := cryptoutil.CalculateDigestSetFromBytes([]byte(a.PipelineUrl), hashes); err == nil {
subjects[fmt.Sprintf("pipelineurl:%v", a.PipelineUrl)] = ds
} else {
Expand Down
2 changes: 1 addition & 1 deletion attestation/maven/maven.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ func (a *Attestor) Attest(ctx *attestation.AttestationContext) error {

func (a *Attestor) Subjects() map[string]cryptoutil.DigestSet {
subjects := make(map[string]cryptoutil.DigestSet)
hashes := []crypto.Hash{crypto.SHA256}
hashes := []cryptoutil.DigestValue{{Hash: crypto.SHA256}}
projectSubject := fmt.Sprintf("project:%v/%v@%v", a.GroupId, a.ArtifactId, a.Version)
if ds, err := cryptoutil.CalculateDigestSetFromBytes([]byte(projectSubject), hashes); err == nil {
subjects[projectSubject] = ds
Expand Down
2 changes: 1 addition & 1 deletion attestation/oci/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ func (a *Attestor) parseMaifest(ctx *attestation.AttestationContext) error {
}

func (a *Attestor) Subjects() map[string]cryptoutil.DigestSet {
hashes := []crypto.Hash{crypto.SHA256}
hashes := []cryptoutil.DigestValue{{Hash: crypto.SHA256}}
subj := make(map[string]cryptoutil.DigestSet)
subj[fmt.Sprintf("manifestdigest:%s", a.ManifestDigest[cryptoutil.DigestValue{Hash: crypto.SHA256}])] = a.ManifestDigest
subj[fmt.Sprintf("tardigest:%s", a.TarDigest[cryptoutil.DigestValue{Hash: crypto.SHA256}])] = a.TarDigest
Expand Down
2 changes: 1 addition & 1 deletion attestation/oci/oci_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func TestAttestor_Attest(t *testing.T) {
t.Fatal(err)
}

hashes := []crypto.Hash{crypto.SHA256}
hashes := []cryptoutil.DigestValue{{Hash: crypto.SHA256}}

tarDigest, err := cryptoutil.CalculateDigestSetFromBytes([]byte(decoded), hashes)
if err != nil {
Expand Down
4 changes: 2 additions & 2 deletions attestation/product/product_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import (
)

func TestFromDigestMap(t *testing.T) {
testDigest, err := cryptoutil.CalculateDigestSetFromBytes([]byte("test"), []crypto.Hash{crypto.SHA256})
testDigest, err := cryptoutil.CalculateDigestSetFromBytes([]byte("test"), []cryptoutil.DigestValue{{Hash: crypto.SHA256}})
assert.NoError(t, err)
testDigestSet := make(map[string]cryptoutil.DigestSet)
testDigestSet["test"] = testDigest
Expand All @@ -57,7 +57,7 @@ func TestAttestorRunType(t *testing.T) {

func TestAttestorAttest(t *testing.T) {
a := New()
testDigest, err := cryptoutil.CalculateDigestSetFromBytes([]byte("test"), []crypto.Hash{crypto.SHA256})
testDigest, err := cryptoutil.CalculateDigestSetFromBytes([]byte("test"), []cryptoutil.DigestValue{{Hash: crypto.SHA256}})
if err != nil {
t.Errorf("Failed to calculate digest set from bytes: %v", err)
}
Expand Down
35 changes: 24 additions & 11 deletions cryptoutil/digestset.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,14 @@ type DigestValue struct {
GitOID bool
}

func (dv DigestValue) New() hash.Hash {
if dv.GitOID {
return &gitoidHasher{hash: dv.Hash, buf: &bytes.Buffer{}}
}

return dv.Hash.New()
}

type DigestSet map[DigestValue]string

func HashToString(h crypto.Hash) (string, error) {
Expand Down Expand Up @@ -142,13 +150,13 @@ func NewDigestSet(digestsByName map[string]string) (DigestSet, error) {
return ds, nil
}

func CalculateDigestSet(r io.Reader, hashes []crypto.Hash) (DigestSet, error) {
func CalculateDigestSet(r io.Reader, digestValues []DigestValue) (DigestSet, error) {
digestSet := make(DigestSet)
writers := []io.Writer{}
hashfuncs := map[crypto.Hash]hash.Hash{}
for _, hash := range hashes {
hashfunc := hash.New()
hashfuncs[hash] = hashfunc
hashfuncs := map[DigestValue]hash.Hash{}
for _, digestValue := range digestValues {
hashfunc := digestValue.New()
hashfuncs[digestValue] = hashfunc
writers = append(writers, hashfunc)
}

Expand All @@ -157,21 +165,26 @@ func CalculateDigestSet(r io.Reader, hashes []crypto.Hash) (DigestSet, error) {
return digestSet, err
}

for hash, hashfunc := range hashfuncs {
digestValue := DigestValue{
Hash: hash,
GitOID: false,
for digestValue, hashfunc := range hashfuncs {
// gitoids are somewhat special... we're using a custom implementation of hash.Hash
// to wrap the gitoid library. Sum will return a gitoid URI, so we don't want to hex
// encode it as it's already a string with a hex encoded hash.
if digestValue.GitOID {
digestSet[digestValue] = string(hashfunc.Sum(nil))
continue
}

digestSet[digestValue] = string(HexEncode(hashfunc.Sum(nil)))
}

return digestSet, nil
}

func CalculateDigestSetFromBytes(data []byte, hashes []crypto.Hash) (DigestSet, error) {
func CalculateDigestSetFromBytes(data []byte, hashes []DigestValue) (DigestSet, error) {
return CalculateDigestSet(bytes.NewReader(data), hashes)
}

func CalculateDigestSetFromFile(path string, hashes []crypto.Hash) (DigestSet, error) {
func CalculateDigestSetFromFile(path string, hashes []DigestValue) (DigestSet, error) {
file, err := os.Open(path)
if err != nil {
return DigestSet{}, err
Expand Down
85 changes: 85 additions & 0 deletions cryptoutil/gitoid.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright 2023 The Witness Contributors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cryptoutil

import (
"bytes"
"crypto"
"encoding/hex"
"fmt"

"github.com/edwarnicke/gitoid"
)

// gitoidHasher implements io.Writer so we can generate gitoids with our CalculateDigestSet function.
// CalculateDigestSet takes in an io.Reader pointing to some data we want to hash, and writes it to a
// MultiWriter that forwards it to writers for each hash we wish to calculate.
// This is a bit hacky -- it maintains an internal buffer and then when asked for the Sum, it calculates
// the gitoid. We may be able to contribute to the gitoid library to make this smoother
type gitoidHasher struct {
buf *bytes.Buffer
hash crypto.Hash
}

// Write implments the io.Writer interface, and writes to the internal buffer
func (gh *gitoidHasher) Write(p []byte) (n int, err error) {
return gh.buf.Write(p)
}

// Sum appends the current hash to b and returns the resulting slice.
// It does not change the underlying hash state.
func (gh *gitoidHasher) Sum(b []byte) []byte {
opts := []gitoid.Option{}
if gh.hash == crypto.SHA256 {
opts = append(opts, gitoid.WithSha256())
}

g, err := gitoid.New(gh.buf, opts...)
if err != nil {
return []byte{}
}

return append(b, []byte(g.URI())...)
}

// Reset resets the Hash to its initial state.
func (gh *gitoidHasher) Reset() {
gh.buf = &bytes.Buffer{}
}

// Size returns the number of bytes Sum will return.
func (gh *gitoidHasher) Size() int {
hashName, err := HashToString(gh.hash)
if err != nil {
return 0
}

// this is somewhat fragile and knows too much about the internals of the gitoid code...
// we're assuming that the default gitoid content type will remain BLOB, and that our
// string representations of hash functions will remain consistent with their...
// and that the URI format will remain consistent.
// this should probably be changed, and this entire thing could maybe be upstreamed to the
// gitoid library.
return len(fmt.Sprintf("gitoid:%s:%s:", gitoid.BLOB, hashName)) + hex.EncodedLen(gh.hash.Size())
}

// BlockSize returns the hash's underlying block size.
// The Write method must be able to accept any amount
// of data, but it may operate more efficiently if all writes
// are a multiple of the block size.
func (gh *gitoidHasher) BlockSize() int {
hf := gh.hash.New()
return hf.BlockSize()
}

0 comments on commit f91a79c

Please sign in to comment.