From 5b75d089083e22f4f39018957a854ac11e7055c4 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Wed, 22 May 2024 14:51:08 -0700 Subject: [PATCH 1/5] Migrate `serialize_v0` to new API. This is the middle layer of the API design work (#172). We add a manifest abstract class to represent various manifests (#111 #112) and also ways to serialize a model directory into manifests and ways to verify the manifests. For now, this only does what was formerly known as `serialize_v0`. The v1 and the manifest versions will come soon. Note: This has a lot of inspiration from #112, but makes the API work with all the usecases we need to consider right now. Signed-off-by: Mihai Maruseac --- model_signing/hashing/file.py | 5 +- model_signing/manifest/__init__.py | 13 ++ model_signing/manifest/manifest.py | 39 ++++ model_signing/serializing/__init__.py | 13 ++ model_signing/serializing/dfs.py | 106 +++++++++ model_signing/serializing/dfs_test.py | 282 +++++++++++++++++++++++ model_signing/serializing/serializing.py | 33 +++ 7 files changed, 488 insertions(+), 3 deletions(-) create mode 100644 model_signing/manifest/__init__.py create mode 100644 model_signing/manifest/manifest.py create mode 100644 model_signing/serializing/__init__.py create mode 100644 model_signing/serializing/dfs.py create mode 100644 model_signing/serializing/dfs_test.py create mode 100644 model_signing/serializing/serializing.py diff --git a/model_signing/hashing/file.py b/model_signing/hashing/file.py index a6e94ec0..ef88407d 100644 --- a/model_signing/hashing/file.py +++ b/model_signing/hashing/file.py @@ -28,7 +28,7 @@ ```python >>> with open("/tmp/file", "w") as f: ... f.write("0123abcd") ->>> hasher = ShardedFileHasher("/tmo/file", SHA256(), start=4, end=8) +>>> hasher = ShardedFileHasher("/tmp/file", SHA256(), start=4, end=8) >>> digest = hasher.compute() >>> digest.digest_hex '88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589' @@ -144,8 +144,7 @@ def __init__( Args: file: The file to hash. Use `set_file` to reset it. content_hasher: A `hashing.HashEngine` instance used to compute the - digest of the file. This instance must not be used outside of this - instance. However, it may be pre-initialized with a header. + digest of the file. start: The file offset to start reading from. Must be valid. Reset with `set_shard`. end: The file offset to start reading from. Must be stricly greater diff --git a/model_signing/manifest/__init__.py b/model_signing/manifest/__init__.py new file mode 100644 index 00000000..0888a055 --- /dev/null +++ b/model_signing/manifest/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2024 The Sigstore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/model_signing/manifest/manifest.py b/model_signing/manifest/manifest.py new file mode 100644 index 00000000..29cbc0d8 --- /dev/null +++ b/model_signing/manifest/manifest.py @@ -0,0 +1,39 @@ +# Copyright 2024 The Sigstore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Machinery for representing a serialized representation of an ML model. + +Currently, we only support a manifest that wraps around a digest. But, to +support incremental updates and partial signature verification, we need a +manifest that lists files and their digests. That will come in a future change, +soon. +""" + +from abc import ABCMeta +from dataclasses import dataclass + +from model_signing.hashing import hashing + + +class Manifest(metaclass=ABCMeta): + """Generic manifest file to represent a model.""" + + pass + + +@dataclass +class DigestManifest(Manifest): + """A manifest that is just a hash.""" + + digest: hashing.Digest diff --git a/model_signing/serializing/__init__.py b/model_signing/serializing/__init__.py new file mode 100644 index 00000000..0888a055 --- /dev/null +++ b/model_signing/serializing/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2024 The Sigstore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/model_signing/serializing/dfs.py b/model_signing/serializing/dfs.py new file mode 100644 index 00000000..08d9a78e --- /dev/null +++ b/model_signing/serializing/dfs.py @@ -0,0 +1,106 @@ +# Copyright 2024 The Sigstore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Model serializers that build a single hash out of a DFS traversal.""" + +import pathlib +from typing import Callable +from typing_extensions import override + +from model_signing.hashing import file +from model_signing.hashing import hashing +from model_signing.manifest import manifest +from model_signing.serializing import serializing + + +def _check_file_or_directory(path: pathlib.Path) -> bool: + """Checks that the given path is either a file or a directory.""" + return path.is_file() or path.is_dir() + + +def _build_header(*, entry_name: str, entry_type: str) -> bytes: + """Builds a header to encode a path with given name and type. + + Args: + entry_name: The name of the entry to build the header for. + entry_type: The type of the entry (file or directory). + """ + encoded_type = entry_type.encode("utf-8") + encoded_name = entry_name.encode("utf-8") + return b".".join([encoded_type, encoded_name, b""]) + + +class DFSSerializer(serializing.Serializer): + """Serializer for a model that performs a traversal of the model directory. + + This serializer produces a single hash for the entire model. If the model is + a file, the hash is the digest of the file. If the model is a directory, we + perform a depth-first traversal of the directory, hash each individual files + and aggregate the hashes together. + """ + + def __init__( + self, + file_hasher: file.FileHasher, + merge_hasher_factory: Callable[[], hashing.StreamingHashEngine], + ): + """Initializes an instance to hash a file with a specific `HashEngine`. + + Args: + hasher: The hash engine used to hash the individual files. + merge_hasher_factory: A callable that returns a + `hashing.StreamingHashEngine` instance used to merge individual + file digests to compute an aggregate digest. + """ + self._file_hasher = file_hasher + self._merge_hasher_factory = merge_hasher_factory + + @override + def serialize(self, model_path: pathlib.Path) -> manifest.Manifest: + # TODO(mihaimaruseac): Add checks for symlinks + if not _check_file_or_directory(model_path): + raise ValueError( + f"Must have a file or directory, but '{model_path}' is neither." + ) + + if model_path.is_file(): + self._file_hasher.set_file(model_path) + return manifest.DigestManifest(self._file_hasher.compute()) + + return manifest.DigestManifest(self._dfs(model_path)) + + def _dfs(self, directory: pathlib.Path) -> hashing.Digest: + # TODO(mihaimaruseac): Add support for excluded files + children = sorted([x for x in directory.iterdir()]) + + hasher = self._merge_hasher_factory() + for child in children: + if not _check_file_or_directory(child): + raise ValueError( + f"Must have a file or directory, but '{child}' is neither." + ) + + if child.is_file(): + header = _build_header(entry_name=child.name, entry_type="file") + hasher.update(header) + self._file_hasher.set_file(child) + digest = self._file_hasher.compute() + hasher.update(digest.digest_value) + else: + header = _build_header(entry_name=child.name, entry_type="dir") + hasher.update(header) + digest = self._dfs(child) + hasher.update(digest.digest_value) + + return hasher.compute() diff --git a/model_signing/serializing/dfs_test.py b/model_signing/serializing/dfs_test.py new file mode 100644 index 00000000..3525bad5 --- /dev/null +++ b/model_signing/serializing/dfs_test.py @@ -0,0 +1,282 @@ +# Copyright 2024 The Sigstore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from model_signing.hashing import file +from model_signing.hashing import memory +from model_signing.serializing import dfs + + +# some constants used throughout testing +_KNOWN_MODEL_TEXT: bytes = b"This is a simple model" +_ANOTHER_MODEL_TEXT: bytes = b"This is another simple model" + + +# Note: Don't make fixtures with global scope as we are altering the models! +@pytest.fixture +def sample_model_file(tmp_path_factory): + file = tmp_path_factory.mktemp("model") / "file" + file.write_bytes(_KNOWN_MODEL_TEXT) + return file + + +@pytest.fixture +def empty_model_file(tmp_path_factory): + file = tmp_path_factory.mktemp("model") / "file" + file.write_bytes(b"") + return file + + +@pytest.fixture +def sample_model_folder(tmp_path_factory): + model_root = tmp_path_factory.mktemp("model") / "root" + model_root.mkdir() + + for i in range(2): + root_dir = model_root / f"d{i}" + root_dir.mkdir() + for j in range(3): + dir_file = root_dir / f"f{i}{j}" + dir_file.write_text(f"This is file f{i}{j} in d{i}.") + + for i in range(4): + root_file = model_root / f"f{i}" + root_file.write_text(f"This is file f{i} in root.") + + return model_root + + +@pytest.fixture +def empty_model_folder(tmp_path_factory): + model_root = tmp_path_factory.mktemp("model") / "root" + model_root.mkdir() + return model_root + + +@pytest.fixture +def deep_model_folder(tmp_path_factory): + model_root = tmp_path_factory.mktemp("model") / "root" + model_root.mkdir() + + current = model_root + for i in range(5): + current = current / f"d{i}" + current.mkdir() + + for i in range(4): + file = current / f"f{i}" + file.write_text(f"This is file f{i}.") + + return model_root + + +class TestDFSSerializer: + + def test_known_file(self, sample_model_file): + file_hasher = file.FileHasher("unused", memory.SHA256()) + serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) + manifest = serializer.serialize(sample_model_file) + expected = ( + "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b" + ) + assert manifest.digest.digest_hex == expected + + def test_file_hash_is_same_as_hash_of_content(self, sample_model_file): + file_hasher = file.FileHasher("unused", memory.SHA256()) + serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) + manifest = serializer.serialize(sample_model_file) + digest = memory.SHA256(_KNOWN_MODEL_TEXT).compute() + assert manifest.digest.digest_hex == digest.digest_hex + + def test_file_model_hash_is_same_if_model_is_moved(self, sample_model_file): + file_hasher = file.FileHasher("unused", memory.SHA256()) + serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) + manifest = serializer.serialize(sample_model_file) + + new_name = sample_model_file.with_name("new-file") + new_file = sample_model_file.rename(new_name) + new_manifest = serializer.serialize(new_file) + + assert manifest == new_manifest + + def test_file_model_hash_changes_if_content_changes( + self, sample_model_file + ): + file_hasher = file.FileHasher("unused", memory.SHA256()) + serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) + manifest = serializer.serialize(sample_model_file) + + sample_model_file.write_bytes(_ANOTHER_MODEL_TEXT) + new_manifest = serializer.serialize(sample_model_file) + + assert manifest.digest.algorithm == new_manifest.digest.algorithm + assert manifest.digest.digest_value != new_manifest.digest.digest_value + + def test_directory_model_with_only_known_file(self, sample_model_file): + file_hasher = file.FileHasher("unused", memory.SHA256()) + serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) + + model = sample_model_file.parent + manifest = serializer.serialize(model) + + expected = ( + "aa856f565699473579c8d7009bfad8c421e1643b810f0a28d47b9ce1f0b98ccc" + ) + assert manifest.digest.digest_hex == expected + + digest = memory.SHA256(_KNOWN_MODEL_TEXT).compute() + assert manifest.digest.digest_hex != digest.digest_hex + + def test_known_folder(self, sample_model_folder): + file_hasher = file.FileHasher("unused", memory.SHA256()) + serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) + manifest = serializer.serialize(sample_model_folder) + expected = ( + "516de24dd65c9749bbde333545cb997c645e21c510107fa5c06428e0df84099b" + ) + assert manifest.digest.digest_hex == expected + + def test_folder_model_hash_is_same_if_model_is_moved( + self, sample_model_folder + ): + file_hasher = file.FileHasher("unused", memory.SHA256()) + serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) + manifest = serializer.serialize(sample_model_folder) + + new_name = sample_model_folder.with_name("new-root") + new_model = sample_model_folder.rename(new_name) + new_manifest = serializer.serialize(new_model) + + assert manifest == new_manifest + + def test_empty_file(self, empty_model_file): + file_hasher = file.FileHasher("unused", memory.SHA256()) + serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) + manifest = serializer.serialize(empty_model_file) + expected = ( + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + ) + assert manifest.digest.digest_hex == expected + + def test_directory_model_with_only_empty_file(self, empty_model_file): + file_hasher = file.FileHasher("unused", memory.SHA256()) + serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) + manifest = serializer.serialize(empty_model_file) + model = empty_model_file.parent + manifest = serializer.serialize(model) + expected = ( + "ca671f6b24ce1b08677759ed050a30eb86a28c18abfa2308c7da9e581a8f7917" + ) + assert manifest.digest.digest_hex == expected + + def test_empty_folder(self, empty_model_folder): + file_hasher = file.FileHasher("unused", memory.SHA256()) + serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) + manifest = serializer.serialize(empty_model_folder) + expected = ( + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + ) + assert manifest.digest.digest_hex == expected + + def test_empty_folder_hashes_the_same_as_empty_file( + self, empty_model_file, empty_model_folder + ): + file_hasher = file.FileHasher("unused", memory.SHA256()) + serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) + folder_manifest = serializer.serialize(empty_model_folder) + file_manifest = serializer.serialize(empty_model_file) + assert ( + folder_manifest.digest.digest_hex == file_manifest.digest.digest_hex + ) + + def test_folder_model_empty_entry(self, sample_model_folder): + file_hasher = file.FileHasher("unused", memory.SHA256()) + serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) + + # Alter first directory within the model + dirs = [d for d in sample_model_folder.iterdir() if d.is_dir()] + altered_dir = dirs[0] + + new_empty_dir = altered_dir / "empty" + new_empty_dir.mkdir() + manifest1 = serializer.serialize(sample_model_folder) + + new_empty_dir.rmdir() + + new_empty_file = altered_dir / "empty" + new_empty_file.write_text("") + manifest2 = serializer.serialize(sample_model_folder) + + assert manifest1.digest != manifest2.digest + + def test_folder_model_rename_file(self, sample_model_folder): + file_hasher = file.FileHasher("unused", memory.SHA256()) + serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) + manifest1 = serializer.serialize(sample_model_folder) + + # Alter first directory within the model + dirs = [d for d in sample_model_folder.iterdir() if d.is_dir()] + altered_dir = dirs[0] + + # Alter first file in the altered_dir + files = [f for f in altered_dir.iterdir() if f.is_file()] + file_to_rename = files[0] + + new_name = file_to_rename.with_name("new-file") + file_to_rename.rename(new_name) + + manifest2 = serializer.serialize(sample_model_folder) + assert manifest1.digest != manifest2.digest + + def test_folder_model_rename_dir(self, sample_model_folder): + file_hasher = file.FileHasher("unused", memory.SHA256()) + serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) + manifest1 = serializer.serialize(sample_model_folder) + + # Alter first directory within the model + dirs = [d for d in sample_model_folder.iterdir() if d.is_dir()] + dir_to_rename = dirs[0] + + new_name = dir_to_rename.with_name("new-dir") + dir_to_rename.rename(new_name) + + manifest2 = serializer.serialize(sample_model_folder) + assert manifest1.digest != manifest2.digest + + def test_folder_model_change_file(self, sample_model_folder): + file_hasher = file.FileHasher("unused", memory.SHA256()) + serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) + manifest1 = serializer.serialize(sample_model_folder) + + # Alter first directory within the model + dirs = [d for d in sample_model_folder.iterdir() if d.is_dir()] + altered_dir = dirs[0] + + # Alter first file in the altered_dir + files = [f for f in altered_dir.iterdir() if f.is_file()] + file_to_change = files[0] + file_to_change.write_bytes(_KNOWN_MODEL_TEXT) + + manifest2 = serializer.serialize(sample_model_folder) + assert manifest1.digest != manifest2.digest + + def test_deep_folder(self, deep_model_folder): + file_hasher = file.FileHasher("unused", memory.SHA256()) + serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) + manifest = serializer.serialize(deep_model_folder) + expected = ( + "1ae1b8a653dba20787ae8482611761ee7f1223b15fbfbaa1fce5c55751048d62" + ) + assert manifest.digest.digest_hex == expected diff --git a/model_signing/serializing/serializing.py b/model_signing/serializing/serializing.py new file mode 100644 index 00000000..50c8f729 --- /dev/null +++ b/model_signing/serializing/serializing.py @@ -0,0 +1,33 @@ +# Copyright 2024 The Sigstore Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Machinery for serializing ML models. + +Currently we have only one serializer that performs a DFS traversal of the model +directory, but more serializers are coming soon. +""" + +from abc import ABCMeta, abstractmethod +import pathlib + +from model_signing.manifest import manifest + + +class Serializer(metaclass=ABCMeta): + """Generic ML model format serializer.""" + + @abstractmethod + def serialize(self, model_path: pathlib.Path) -> manifest.Manifest: + """Serializes the model given by the `model_path` argument.""" + pass From b1be32544ed66058bc0f3e1f26ac425b08b6972c Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Tue, 4 Jun 2024 07:48:43 -0700 Subject: [PATCH 2/5] Clarify some comments Signed-off-by: Mihai Maruseac --- model_signing/serializing/dfs.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/model_signing/serializing/dfs.py b/model_signing/serializing/dfs.py index 08d9a78e..7f1d956f 100644 --- a/model_signing/serializing/dfs.py +++ b/model_signing/serializing/dfs.py @@ -25,7 +25,14 @@ def _check_file_or_directory(path: pathlib.Path) -> bool: - """Checks that the given path is either a file or a directory.""" + """Checks that the given path is either a file or a directory. + + There is no support for sockets, pipes, or any other operating system + concept abstracted as a file. + + Furthermore, this would return False if the path is a broken symlink, if it + doesn't exists or if there are permission errors. + """ return path.is_file() or path.is_dir() @@ -38,6 +45,7 @@ def _build_header(*, entry_name: str, entry_type: str) -> bytes: """ encoded_type = entry_type.encode("utf-8") encoded_name = entry_name.encode("utf-8") + # Note: make sure to end with a ".". return b".".join([encoded_type, encoded_name, b""]) @@ -68,10 +76,12 @@ def __init__( @override def serialize(self, model_path: pathlib.Path) -> manifest.Manifest: - # TODO(mihaimaruseac): Add checks for symlinks + # TODO(mihaimaruseac): Add checks to exclude symlinks if desired if not _check_file_or_directory(model_path): raise ValueError( - f"Must have a file or directory, but '{model_path}' is neither." + f"Cannot use '{model_path}' as file or directory. It could be a" + " special file, it could be missing, or there might be a" + " permission issue." ) if model_path.is_file(): @@ -88,7 +98,9 @@ def _dfs(self, directory: pathlib.Path) -> hashing.Digest: for child in children: if not _check_file_or_directory(child): raise ValueError( - f"Must have a file or directory, but '{child}' is neither." + f"Cannot use '{child}' as file or directory. It could be a" + " special file, it could be missing, or there might be a" + " permission issue." ) if child.is_file(): From ebb37b4929bb46f378b2b3a0e19b7c68c20b8669 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Tue, 4 Jun 2024 11:41:59 -0700 Subject: [PATCH 3/5] Encode name with base64 Signed-off-by: Mihai Maruseac --- model_signing/serializing/dfs.py | 4 +++- model_signing/serializing/dfs_test.py | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/model_signing/serializing/dfs.py b/model_signing/serializing/dfs.py index 7f1d956f..5d42c4c1 100644 --- a/model_signing/serializing/dfs.py +++ b/model_signing/serializing/dfs.py @@ -14,6 +14,7 @@ """Model serializers that build a single hash out of a DFS traversal.""" +import base64 import pathlib from typing import Callable from typing_extensions import override @@ -44,7 +45,8 @@ def _build_header(*, entry_name: str, entry_type: str) -> bytes: entry_type: The type of the entry (file or directory). """ encoded_type = entry_type.encode("utf-8") - encoded_name = entry_name.encode("utf-8") + # Prevent confusion if name has a "." inside by encoding to base64. + encoded_name = base64.b64encode(entry_name.encode("utf-8")) # Note: make sure to end with a ".". return b".".join([encoded_type, encoded_name, b""]) diff --git a/model_signing/serializing/dfs_test.py b/model_signing/serializing/dfs_test.py index 3525bad5..aae916d6 100644 --- a/model_signing/serializing/dfs_test.py +++ b/model_signing/serializing/dfs_test.py @@ -132,7 +132,7 @@ def test_directory_model_with_only_known_file(self, sample_model_file): manifest = serializer.serialize(model) expected = ( - "aa856f565699473579c8d7009bfad8c421e1643b810f0a28d47b9ce1f0b98ccc" + "a0865eb7e299e3bca3951e24930c56dcf1533ecff63bda06a9be67906773c628" ) assert manifest.digest.digest_hex == expected @@ -144,7 +144,7 @@ def test_known_folder(self, sample_model_folder): serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) manifest = serializer.serialize(sample_model_folder) expected = ( - "516de24dd65c9749bbde333545cb997c645e21c510107fa5c06428e0df84099b" + "310af4fc4c52bf63cd1687c67076ed3e56bc5480a1b151539e6c550506ae0301" ) assert manifest.digest.digest_hex == expected @@ -177,7 +177,7 @@ def test_directory_model_with_only_empty_file(self, empty_model_file): model = empty_model_file.parent manifest = serializer.serialize(model) expected = ( - "ca671f6b24ce1b08677759ed050a30eb86a28c18abfa2308c7da9e581a8f7917" + "8a587b2129fdecfbea38d5152b626299f5994d9b99d36b321aea356f69b38c61" ) assert manifest.digest.digest_hex == expected @@ -277,6 +277,6 @@ def test_deep_folder(self, deep_model_folder): serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) manifest = serializer.serialize(deep_model_folder) expected = ( - "1ae1b8a653dba20787ae8482611761ee7f1223b15fbfbaa1fce5c55751048d62" + "36eed9389ebbbe15ac15d33c81dabb60ccb7c945ff641d78f59db9aa9dc47ac9" ) assert manifest.digest.digest_hex == expected From 28265134d5effb3afdb9650867680ce8db965567 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Tue, 4 Jun 2024 11:53:20 -0700 Subject: [PATCH 4/5] Add another test case Signed-off-by: Mihai Maruseac --- model_signing/serializing/dfs_test.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/model_signing/serializing/dfs_test.py b/model_signing/serializing/dfs_test.py index aae916d6..8ff67c20 100644 --- a/model_signing/serializing/dfs_test.py +++ b/model_signing/serializing/dfs_test.py @@ -255,6 +255,24 @@ def test_folder_model_rename_dir(self, sample_model_folder): manifest2 = serializer.serialize(sample_model_folder) assert manifest1.digest != manifest2.digest + def test_folder_model_replace_file_empty_folder(self, sample_model_folder): + file_hasher = file.FileHasher("unused", memory.SHA256()) + serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) + manifest1 = serializer.serialize(sample_model_folder) + + # Alter first directory within the model + dirs = [d for d in sample_model_folder.iterdir() if d.is_dir()] + altered_dir = dirs[0] + + # Replace first file in the altered_dir + files = [f for f in altered_dir.iterdir() if f.is_file()] + file_to_replace = files[0] + file_to_replace.unlink() + file_to_replace.mkdir() + + manifest2 = serializer.serialize(sample_model_folder) + assert manifest1.digest != manifest2.digest + def test_folder_model_change_file(self, sample_model_folder): file_hasher = file.FileHasher("unused", memory.SHA256()) serializer = dfs.DFSSerializer(file_hasher, memory.SHA256) From d1c726154ee81e74e8167f734d1c1bf255ca5932 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Wed, 5 Jun 2024 09:43:50 -0700 Subject: [PATCH 5/5] Empty commit to retrigger DCO check. See https://github.com/dcoapp/app/issues/211#issuecomment-2150495994 Signed-off-by: Mihai Maruseac