diff --git a/Cargo.lock b/Cargo.lock index 54853440c8e9d..81866093cfeae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -153,6 +153,22 @@ dependencies = [ "tempfile", ] +[[package]] +name = "astral-tokio-tar" +version = "0.4.2" +source = "git+https://github.com/astral-sh/tokio-tar?branch=charlie%2Fmemo#3aba8cf91d9fa5eb9f9911474117185c48573066" +dependencies = [ + "filetime", + "futures-core", + "libc", + "portable-atomic", + "redox_syscall 0.3.5", + "rustc-hash", + "tokio", + "tokio-stream", + "xattr", +] + [[package]] name = "async-channel" version = "2.3.1" @@ -1979,22 +1995,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "krata-tokio-tar" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8bd5fee9b96acb5fc36b401896d601e6fdcce52b0e651ce24a3b21fb524e79f" -dependencies = [ - "filetime", - "futures-core", - "libc", - "portable-atomic", - "redox_syscall 0.3.5", - "tokio", - "tokio-stream", - "xattr", -] - [[package]] name = "kurbo" version = "0.8.3" @@ -5060,11 +5060,11 @@ dependencies = [ name = "uv-extract" version = "0.0.1" dependencies = [ + "astral-tokio-tar", "async-compression", "async_zip", "fs-err 3.1.0", "futures", - "krata-tokio-tar", "md-5", "rayon", "reqwest", @@ -5344,6 +5344,7 @@ dependencies = [ name = "uv-publish" version = "0.1.0" dependencies = [ + "astral-tokio-tar", "async-compression", "base64 0.22.1", "fs-err 3.1.0", @@ -5351,7 +5352,6 @@ dependencies = [ "glob", "insta", "itertools 0.14.0", - "krata-tokio-tar", "reqwest", "reqwest-middleware", "reqwest-retry", diff --git a/Cargo.toml b/Cargo.toml index 8aa58cad9fd1f..52cd54cf73f51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,6 +73,7 @@ uv-workspace = { path = "crates/uv-workspace" } anstream = { version = "0.6.15" } anyhow = { version = "1.0.89" } arcstr = { version = "1.2.0" } +astral-tokio-tar = { git = "https://github.com/astral-sh/tokio-tar", branch = "charlie/memo" } async-channel = { version = "2.3.1" } async-compression = { version = "0.4.12", features = ["bzip2", "gzip", "xz", "zstd"] } async-trait = { version = "0.1.82" } @@ -118,7 +119,6 @@ indoc = { version = "2.0.5" } itertools = { version = "0.14.0" } jiff = { version = "0.1.14", features = ["serde"] } junction = { version = "1.2.0" } -krata-tokio-tar = { version = "0.4.2" } mailparse = { version = "0.15.0" } md-5 = { version = "0.10.6" } memchr = { version = "2.7.4" } diff --git a/crates/uv-extract/Cargo.toml b/crates/uv-extract/Cargo.toml index 7f3851f46fe73..fc6c3343bf5ca 100644 --- a/crates/uv-extract/Cargo.toml +++ b/crates/uv-extract/Cargo.toml @@ -20,11 +20,11 @@ uv-configuration = { workspace = true } uv-distribution-filename = { workspace = true } uv-pypi-types = { workspace = true } +astral-tokio-tar = { workspace = true } async-compression = { workspace = true, features = ["bzip2", "gzip", "zstd", "xz"] } async_zip = { workspace = true } fs-err = { workspace = true, features = ["tokio"] } futures = { workspace = true } -krata-tokio-tar = { workspace = true } md-5 = { workspace = true } rayon = { workspace = true } reqwest = { workspace = true } diff --git a/crates/uv-extract/src/stream.rs b/crates/uv-extract/src/stream.rs index 49ae138c451db..3851447daad86 100644 --- a/crates/uv-extract/src/stream.rs +++ b/crates/uv-extract/src/stream.rs @@ -3,6 +3,7 @@ use std::pin::Pin; use futures::StreamExt; use rustc_hash::FxHashSet; +use tokio_tar::EntryType; use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; use tracing::warn; @@ -143,6 +144,17 @@ async fn untar_in( mut archive: tokio_tar::Archive<&'_ mut (dyn tokio::io::AsyncRead + Unpin)>, dst: &Path, ) -> std::io::Result<()> { + // Like `tokio-tar`, canonicalize the destination prior to unpacking. + let dst = fs_err::tokio::canonicalize(dst).await?; + + // Memoize filesystem calls to canonicalize paths. + let mut memo = FxHashSet::default(); + + // Delay any directory entries until the end (they will be created if needed by + // descendants), to ensure that directory permissions do not interfere with descendant + // extraction. + let mut directories = Vec::new(); + let mut entries = archive.entries()?; let mut pinned = Pin::new(&mut entries); while let Some(entry) = pinned.next().await { @@ -159,7 +171,12 @@ async fn untar_in( continue; } - file.unpack_in(dst).await?; + if file.header().entry_type() == EntryType::Directory { + directories.push(file); + continue; + } + + file.unpack_in_memo(&dst, &mut memo).await?; // Preserve the executable bit. #[cfg(unix)] @@ -172,7 +189,7 @@ async fn untar_in( let mode = file.header().mode()?; let has_any_executable_bit = mode & 0o111; if has_any_executable_bit != 0 { - if let Some(path) = crate::tar::unpacked_at(dst, &file.path()?) { + if let Some(path) = crate::tar::unpacked_at(&dst, &file.path()?) { let permissions = fs_err::tokio::metadata(&path).await?.permissions(); if permissions.mode() & 0o111 != 0o111 { fs_err::tokio::set_permissions( @@ -186,6 +203,12 @@ async fn untar_in( } } } + + // Create any deferred directories. + for mut dir in directories { + dir.unpack_in_memo(&dst, &mut memo).await?; + } + Ok(()) } diff --git a/crates/uv-publish/Cargo.toml b/crates/uv-publish/Cargo.toml index 06f4fb9401eb9..be2a627b6f6bd 100644 --- a/crates/uv-publish/Cargo.toml +++ b/crates/uv-publish/Cargo.toml @@ -25,13 +25,13 @@ uv-pypi-types = { workspace = true } uv-static = { workspace = true } uv-warnings = { workspace = true } +astral-tokio-tar = { workspace = true } async-compression = { workspace = true } base64 = { workspace = true } fs-err = { workspace = true } futures = { workspace = true } glob = { workspace = true } itertools = { workspace = true } -krata-tokio-tar = { workspace = true } reqwest = { workspace = true } reqwest-middleware = { workspace = true } reqwest-retry = { workspace = true }