From 3f8c6016e64acb869eddd3e29177a2a345ece969 Mon Sep 17 00:00:00 2001 From: Xynnn007 Date: Wed, 11 Dec 2024 15:57:18 +0800 Subject: [PATCH] image-rs: fix duplicated image layer detective logic Before this commit, when we pull images who have two encrypted layers whose corresponding plaintext layers are same like `quay.io/fidencio/prueba:encrypted`, ther will be an error like ``` index out of bounds: the len is 25 but the index is 25 ``` This is caused by the deduplication logic inside image pull logic. On one hand, it will delete duplicated layers recorded inside image manifest, who reflectes the encrypted layers/blobs. On the other hand, it will delete duplicated layers recorded inside the config.json, who reflects the plaintext of the layers. The image encryption logic will generate a random symmetric key for each layer. Thus even the same plaintext layer would be encrypted into different blobs/layers. Thus after deduplication, we might have more layers for image manifest. This patch changes the deduplicating logic, by only check the layer digests inside image manifest, s.t. even if there are two same plaintext layers, we will pull and decrypt both of them. It's ok to do some optimization later if a fully analyzation is token. Fies #840 Signed-off-by: Xynnn007 --- image-rs/src/image.rs | 33 +++++++++++++++++---------------- image-rs/src/pull.rs | 6 ++++++ 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/image-rs/src/image.rs b/image-rs/src/image.rs index 8c43eb0ac..84494fd76 100644 --- a/image-rs/src/image.rs +++ b/image-rs/src/image.rs @@ -422,7 +422,7 @@ fn create_image_meta( id: id.to_string(), digest: image_digest.to_string(), reference: image_url.to_string(), - image_config: ImageConfiguration::from_reader(image_config.to_string().as_bytes())?, + image_config: ImageConfiguration::from_reader(image_config.as_bytes())?, ..Default::default() }; @@ -431,26 +431,27 @@ fn create_image_meta( bail!("Pulled number of layers mismatch with image config diff_ids"); } + // Note that an image's `diff_ids` may always refer to plaintext layer + // digests. For two encryption layers encrypted from a same plaintext + // layer, the `LayersData.Digest` of the image manifest might be different + // because the symmetric key to encrypt is different, thus the cipher text + // is different. Interestingly in such case the `diff_ids` of the both + // layers are the same in the config.json. + // Another note is that the order of layers in the image config and the + // image manifest will always be the same, so it is safe to use a same + // index to lookup or mark a layer. let mut unique_layers = Vec::new(); - let mut digests = BTreeSet::new(); - for l in &image_manifest.layers { - if digests.contains(&l.digest) { - continue; - } - - digests.insert(&l.digest); - unique_layers.push(l.clone()); - } - let mut unique_diff_ids = Vec::new(); - let mut id_tree = BTreeSet::new(); - for id in diff_ids { - if id_tree.contains(id.as_str()) { + + let mut digests = BTreeSet::new(); + for i in 0..diff_ids.len() { + if digests.contains(&image_manifest.layers[i].digest) { continue; } - id_tree.insert(id.as_str()); - unique_diff_ids.push(id.clone()); + digests.insert(&image_manifest.layers[i].digest); + unique_layers.push(image_manifest.layers[i].clone()); + unique_diff_ids.push(diff_ids[i].clone()); } Ok((image_data, unique_layers, unique_diff_ids)) diff --git a/image-rs/src/pull.rs b/image-rs/src/pull.rs index 56bd16e62..d6e04d372 100644 --- a/image-rs/src/pull.rs +++ b/image-rs/src/pull.rs @@ -162,6 +162,10 @@ impl<'a> PullClient<'a> { }; let decryptor = Decryptor::from_media_type(&layer.media_type); + + // There are two types of layers: + // 1. Comporessed layer = Compress(Layer Data) + // 2. Encrypted+Compressed layer = Compress(Encrypt(Layer Data)) if decryptor.is_encrypted() { let decrypt_key = tokio::task::spawn_blocking({ let decryptor = decryptor.clone(); @@ -210,6 +214,8 @@ impl<'a> PullClient<'a> { Ok(layer_meta) } + /// Decompress and unpack layer data. The returned value is the + /// digest of the uncompressed layer. async fn async_decompress_unpack_layer( &self, input_reader: (impl tokio::io::AsyncRead + Unpin + Send),