Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add in-toto format with hashes of files as subjects #266

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 129 additions & 0 deletions model_signing/signing/in_toto.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,3 +352,132 @@ def from_manifest(cls, manifest: manifest_module.Manifest) -> Self:
predicate_top_level_name="shards",
)
return cls(statement)


def _convert_descriptors_to_direct_statement(
manifest: manifest_module.Manifest, predicate_type: str
):
"""Converts manifest descriptors to an in-toto statement, as subjects.

Args:
manifest: The manifest to extract the descriptors from. Assumed valid.
predicate_type: The predicate_type to use in the in-toto statement.
"""
subjects = []
for descriptor in manifest.resource_descriptors():
subject = statement.ResourceDescriptor(
name=descriptor.identifier,
digest={"sha256": descriptor.digest.digest_hex},
annotations={"actual_hash_algorithm": descriptor.digest.algorithm},
)
subjects.append(subject.pb)

return statement.Statement(
subjects=subjects,
predicate_type=predicate_type,
# https://github.com/in-toto/attestation/issues/374
predicate={"unused":"Unused, just passed due to API requirements"},
)


class DigestsIntotoPayload(IntotoPayload):
"""In-toto payload where the subjects are the model files themselves.

This payload is supposed to be used for manifests where every file in the
model is matched with a digest. Because existing tooling only supports
established hashing algorithms, we annotate every subject with the actual
hash algorithm used to compute the file digest, and use "sha256" as the
algorithm name in the digest itself.

Example:
```json
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"name": "d0/d1/d2/d3/d4/f0",
"digest": {
"sha256": "6efa14..."
},
"annotations": {
"actual_hash_algorithm": "file-sha256"
}
},
{
"name": "d0/d1/d2/d3/d4/f1",
"digest": {
"sha256": "a9bc14..."
},
"annotations": {
"actual_hash_algorithm": "file-sha256"
}
},
{
"name": "d0/d1/d2/d3/d4/f2",
"digest": {
"sha256": "5f597e..."
},
"annotations": {
"actual_hash_algorithm": "file-sha256"
}
},
{
"name": "d0/d1/d2/d3/d4/f3",
"digest": {
"sha256": "eaf677..."
},
"annotations": {
"actual_hash_algorithm": "file-sha256"
}
}
],
"predicateType": "https://model_signing/Digests/v0.1",
"predicate": {
"unused": "Unused, just passed due to API requirements"
}
}
```

If the annotation for a subject is missing, or it does not contain
actual_hash_algorithm, it should be assumed that the digest is computed via
the algorithm listed in the digest dictionary (i.e., sha256).

See also https://github.com/sigstore/sigstore-python/issues/1018.
"""

predicate_type: Final[str] = "https://model_signing/Digests/v0.1"

def __init__(self, statement: statement.Statement):
"""Builds an instance of this in-toto payload.

Don't call this directly in production. Use `from_manifest()` instead.

Args:
statement: The DSSE statement representing this in-toto payload.
"""
self.statement = statement

@classmethod
@override
def from_manifest(cls, manifest: manifest_module.Manifest) -> Self:
"""Converts a manifest to the signing payload used for signing.

The manifest must be one where every model file is paired with its own
digest. Currently, this is only `FileLevelManifest`.

Args:
manifest: the manifest to convert to signing payload.

Returns:
An instance of `DigestOfDigestsIntotoPayload`.

Raises:
TypeError: If the manifest is not `FileLevelManifest`.
"""
if not isinstance(manifest, manifest_module.FileLevelManifest):
raise TypeError("Only FileLevelManifest is supported")

statement = _convert_descriptors_to_direct_statement(
manifest, predicate_type=cls.predicate_type
)
return cls(statement)
56 changes: 56 additions & 0 deletions model_signing/signing/in_toto_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,59 @@ def test_only_runs_on_expected_manifest_types(self):
match="Only ShardLevelManifest is supported",
):
in_toto.DigestOfShardDigestsIntotoPayload.from_manifest(manifest)


class TestDigestsIntotoPayload:

def _hasher_factory(self, path: pathlib.Path) -> file.FileHasher:
return file.SimpleFileHasher(path, memory.SHA256())

@pytest.mark.parametrize("model_fixture_name", test_support.all_test_models)
def test_known_models(self, request, model_fixture_name):
# Set up variables (arrange)
testdata_path = request.path.parent / "testdata"
test_path = testdata_path / "in_toto"
test_class_path = test_path / "TestDigestsIntotoPayload"
golden_path = test_class_path / model_fixture_name
should_update = request.config.getoption("update_goldens")
model = request.getfixturevalue(model_fixture_name)

# Compute payload (act)
serializer = serialize_by_file.ManifestSerializer(
self._hasher_factory, allow_symlinks=True
)
manifest = serializer.serialize(model)
payload = in_toto.DigestsIntotoPayload.from_manifest(manifest)

# Compare with golden, or write to golden (approximately "assert")
if should_update:
with open(golden_path, "w", encoding="utf-8") as f:
f.write(f"{json_format.MessageToJson(payload.statement.pb)}\n")
else:
with open(golden_path, "r", encoding="utf-8") as f:
json_contents = f.read()
expected_proto = json_format.Parse(
json_contents, statement_pb2.Statement()
)

assert payload.statement.pb == expected_proto

def test_produces_valid_statements(self, sample_model_folder):
serializer = serialize_by_file.ManifestSerializer(
self._hasher_factory, allow_symlinks=True
)
manifest = serializer.serialize(sample_model_folder)

payload = in_toto.DigestsIntotoPayload.from_manifest(manifest)

payload.statement.validate()

def test_only_runs_on_expected_manifest_types(self):
digest = hashing.Digest("test", b"test_digest")
manifest = manifest_module.DigestManifest(digest)

with pytest.raises(
TypeError,
match="Only FileLevelManifest is supported",
):
in_toto.DigestsIntotoPayload.from_manifest(manifest)
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"name": "d0/d1/d2/d3/d4/f0",
"digest": {
"sha256": "6efa14bb03544fcb76045c55f25b9315b6eb5be2d8a85f703193a76b7874c6ff"
},
"annotations": {
"actual_hash_algorithm": "file-sha256"
}
},
{
"name": "d0/d1/d2/d3/d4/f1",
"digest": {
"sha256": "a9bc149b70b9d325cd68d275d582cfdb98c0347d3ce54590aa6533368daed3d2"
},
"annotations": {
"actual_hash_algorithm": "file-sha256"
}
},
{
"name": "d0/d1/d2/d3/d4/f2",
"digest": {
"sha256": "5f597e6a92d1324d9adbed43d527926d11d0131487baf315e65ae1ef3b1ca3c0"
},
"annotations": {
"actual_hash_algorithm": "file-sha256"
}
},
{
"name": "d0/d1/d2/d3/d4/f3",
"digest": {
"sha256": "eaf677c35fec6b87889d9e4563d8bb65dcb9869ca0225697c9cc44cf49dca008"
},
"annotations": {
"actual_hash_algorithm": "file-sha256"
}
}
],
"predicateType": "https://model_signing/Digests/v0.1",
"predicate": {
"unused": "Unused, just passed due to API requirements"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"name": ".",
"digest": {
"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
},
"annotations": {
"actual_hash_algorithm": "file-sha256"
}
}
],
"predicateType": "https://model_signing/Digests/v0.1",
"predicate": {
"unused": "Unused, just passed due to API requirements"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"_type": "https://in-toto.io/Statement/v1",
"predicateType": "https://model_signing/Digests/v0.1",
"predicate": {
"unused": "Unused, just passed due to API requirements"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"name": "empty_file",
"digest": {
"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
},
"annotations": {
"actual_hash_algorithm": "file-sha256"
}
}
],
"predicateType": "https://model_signing/Digests/v0.1",
"predicate": {
"unused": "Unused, just passed due to API requirements"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"name": ".",
"digest": {
"sha256": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b"
},
"annotations": {
"actual_hash_algorithm": "file-sha256"
}
}
],
"predicateType": "https://model_signing/Digests/v0.1",
"predicate": {
"unused": "Unused, just passed due to API requirements"
}
}
Loading
Loading