From a94276a2330628d135ff936459920cfa5df64e79 Mon Sep 17 00:00:00 2001 From: William Batista Date: Tue, 17 Oct 2023 23:38:32 -0400 Subject: [PATCH] Add compression to the client This should help overall file sizes on the server --- Cargo.lock | 119 +++++++++++++++++++++++---------------------- bfsp/Cargo.toml | 2 +- bfsp/src/crypto.rs | 5 -- bfsp/src/lib.rs | 8 ++- cli/src/main.rs | 12 +++-- flake.lock | 24 ++++----- src/main.rs | 14 +++--- 7 files changed, 95 insertions(+), 89 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8224b7c..fd97f69 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -171,11 +171,11 @@ dependencies = [ "anyhow", "blake3", "chacha20poly1305", - "num", "rkyv", "sqlx", "tokio", "uuid", + "zstd", ] [[package]] @@ -186,9 +186,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" dependencies = [ "serde", ] @@ -274,6 +274,7 @@ version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" dependencies = [ + "jobserver", "libc", ] @@ -494,7 +495,7 @@ dependencies = [ "hashbrown 0.14.1", "lock_api", "once_cell", - "parking_lot_core 0.9.8", + "parking_lot_core 0.9.9", ] [[package]] @@ -927,6 +928,15 @@ version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" +[[package]] +name = "jobserver" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" +dependencies = [ + "libc", +] + [[package]] name = "js-sys" version = "0.3.64" @@ -976,9 +986,9 @@ checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" [[package]] name = "lock_api" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" dependencies = [ "autocfg", "scopeguard", @@ -1051,31 +1061,6 @@ dependencies = [ "minimal-lexical", ] -[[package]] -name = "num" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - -[[package]] -name = "num-bigint" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - [[package]] name = "num-bigint-dig" version = "0.8.4" @@ -1093,15 +1078,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "num-complex" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" -dependencies = [ - "num-traits", -] - [[package]] name = "num-integer" version = "0.1.45" @@ -1123,18 +1099,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-rational" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" -dependencies = [ - "autocfg", - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.17" @@ -1194,7 +1158,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", - "parking_lot_core 0.9.8", + "parking_lot_core 0.9.9", ] [[package]] @@ -1213,13 +1177,13 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.8" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.3.5", + "redox_syscall 0.4.1", "smallvec", "windows-targets", ] @@ -1413,6 +1377,15 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "redox_users" version = "0.4.3" @@ -1495,7 +1468,7 @@ version = "0.38.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "745ecfa778e66b2b63c88a61cb36e0eea109e803b0b86bf9879fbc77c70e86ed" dependencies = [ - "bitflags 2.4.0", + "bitflags 2.4.1", "errno", "libc", "linux-raw-sys", @@ -1758,7 +1731,7 @@ checksum = "864b869fdf56263f4c95c45483191ea0af340f9f3e3e7b4d57a61c7c87a970db" dependencies = [ "atoi", "base64", - "bitflags 2.4.0", + "bitflags 2.4.1", "byteorder", "bytes", "crc", @@ -1800,7 +1773,7 @@ checksum = "eb7ae0e6a97fb3ba33b23ac2671a5ce6e3cabe003f451abd5a56e7951d975624" dependencies = [ "atoi", "base64", - "bitflags 2.4.0", + "bitflags 2.4.1", "byteorder", "crc", "dotenvy", @@ -2317,3 +2290,31 @@ name = "zeroize" version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9" + +[[package]] +name = "zstd" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.9+zstd.1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/bfsp/Cargo.toml b/bfsp/Cargo.toml index 5eef7c6..877c693 100644 --- a/bfsp/Cargo.toml +++ b/bfsp/Cargo.toml @@ -13,7 +13,7 @@ tokio = { version = "1", features = ["fs", "io-util"] } sqlx = { version = "0.7", default-features = false } uuid = { version = "1", features = ["v4"] } chacha20poly1305 = { version = "0.10", features = ["std"] } -num = "0.4.1" +zstd = "0.13.0" [dev-dependencies] tokio = { version = "1", features = ["fs", "io-util", "macros", "rt-multi-thread"] } diff --git a/bfsp/src/crypto.rs b/bfsp/src/crypto.rs index 1105385..9af9a39 100644 --- a/bfsp/src/crypto.rs +++ b/bfsp/src/crypto.rs @@ -1,6 +1,5 @@ use anyhow::{anyhow, Result}; use chacha20poly1305::{aead::OsRng, AeadInPlace, Key, KeyInit, XChaCha20Poly1305}; -use num::PrimInt; use rkyv::{Archive, Deserialize, Serialize}; use sqlx::Sqlite; @@ -120,7 +119,3 @@ impl EncryptionNonce { pub async fn init_key() -> Result<()> { todo!() } - -pub fn size_to_encrypted_size(size: S) -> S { - size + (0..16).into_iter().map(|_| S::one()).sum() -} diff --git a/bfsp/src/lib.rs b/bfsp/src/lib.rs index bef170e..abfe8c9 100644 --- a/bfsp/src/lib.rs +++ b/bfsp/src/lib.rs @@ -442,7 +442,7 @@ impl FileHeader { } //TODO: can this be a slice? -pub async fn encrypted_chunk_from_file( +pub async fn compressed_encrypted_chunk_from_file( file_header: &FileHeader, file: &mut File, chunk_id: ChunkID, @@ -467,6 +467,12 @@ pub async fn encrypted_chunk_from_file( .read_to_end(&mut buf) .await?; + + println!("Size before compression: {}KB", buf.len()); + + let mut buf = zstd::bulk::compress(&buf, 15)?; + println!("Size after compression: {}KB", buf.len()); + file.rewind().await?; key.encrypt_chunk_in_place(&mut buf, &chunk_meta)?; diff --git a/cli/src/main.rs b/cli/src/main.rs index 56ddc2a..633e39d 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -9,7 +9,7 @@ use std::path::Path; use anyhow::Result; use bfsp::{ - encrypted_chunk_from_file, hash_chunk, hash_file, parallel_hash_chunk, size_to_encrypted_size, + compressed_encrypted_chunk_from_file, hash_chunk, hash_file, parallel_hash_chunk, use_parallel_hasher, Action, ChunkID, ChunkMetadata, ChunksUploaded, ChunksUploadedQuery, DownloadChunkReq, EncryptionKey, FileHash, FileHeader, }; @@ -363,8 +363,10 @@ pub async fn upload_file( sock.write_u16(action).await?; sock.write_all(chunk_meta.to_bytes()?.as_slice()).await?; - let chunk = encrypted_chunk_from_file(file_header, &mut file, chunk_meta.id, key).await?; - debug_assert_eq!(chunk.len() as u32, size_to_encrypted_size(chunk_meta.size)); + let chunk = + compressed_encrypted_chunk_from_file(file_header, &mut file, chunk_meta.id, key) + .await?; + sock.write_u32(chunk.len() as u32).await?; sock.write_all(chunk.as_slice()).await?; } @@ -429,7 +431,9 @@ pub async fn download_file + Display>( trace!("Reading chunk of size {}", chunk_metadata.size); - let mut chunk_buf = vec![0; size_to_encrypted_size(chunk_metadata.size) as usize]; + let chunk_size = sock.read_u32().await?; + + let mut chunk_buf = vec![0; chunk_size as usize]; sock.read_exact(&mut chunk_buf) .await .with_context(|| "Error reading raw chunk data")?; diff --git a/flake.lock b/flake.lock index 156103b..86e9786 100644 --- a/flake.lock +++ b/flake.lock @@ -5,11 +5,11 @@ "nixpkgs": "nixpkgs" }, "locked": { - "lastModified": 1690373729, - "narHash": "sha256-e136hTT7LqQ2QjOTZQMW+jnsevWwBpMj78u6FRUsH9I=", + "lastModified": 1694081375, + "narHash": "sha256-vzJXOUnmkMCm3xw8yfPP5m8kypQ3BhAIRe4RRCWpzy8=", "owner": "nmattia", "repo": "naersk", - "rev": "d9a33d69a9c421d64c8d925428864e93be895dcc", + "rev": "3f976d822b7b37fc6fb8e6f157c2dd05e7e94e89", "type": "github" }, "original": { @@ -21,11 +21,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1691709280, - "narHash": "sha256-zmfH2OlZEXwv572d0g8f6M5Ac6RiO8TxymOpY3uuqrM=", + "lastModified": 1697379843, + "narHash": "sha256-RcnGuJgC2K/UpTy+d32piEoBXq2M+nVFzM3ah/ZdJzg=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "cf73a86c35a84de0e2f3ba494327cf6fb51c0dfd", + "rev": "12bdeb01ff9e2d3917e6a44037ed7df6e6c3df9d", "type": "github" }, "original": { @@ -35,11 +35,11 @@ }, "nixpkgs_2": { "locked": { - "lastModified": 1691654369, - "narHash": "sha256-gSILTEx1jRaJjwZxRlnu3ZwMn1FVNk80qlwiCX8kmpo=", + "lastModified": 1697456312, + "narHash": "sha256-roiSnrqb5r+ehnKCauPLugoU8S36KgmWraHgRqVYndo=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "ce5e4a6ef2e59d89a971bc434ca8ca222b9c7f5e", + "rev": "ca012a02bf8327be9e488546faecae5e05d7d749", "type": "github" }, "original": { @@ -76,11 +76,11 @@ "systems": "systems" }, "locked": { - "lastModified": 1689068808, - "narHash": "sha256-6ixXo3wt24N/melDWjq70UuHQLxGV8jZvooRanIHXw0=", + "lastModified": 1694529238, + "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=", "owner": "numtide", "repo": "flake-utils", - "rev": "919d646de7be200f3bf08cb76ae1f09402b6f9b4", + "rev": "ff7b65b44d01cf9ba6a71320833626af21126384", "type": "github" }, "original": { diff --git a/src/main.rs b/src/main.rs index 3b93e1f..8cda41a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,10 @@ // TODO: StorageBackendTrait mod auth; -use std::collections::HashMap; +use std::{collections::HashMap, os::unix::prelude::MetadataExt}; use anyhow::{anyhow, Result}; -use bfsp::{ - size_to_encrypted_size, Action, ChunkID, ChunkMetadata, ChunksUploaded, ChunksUploadedQuery, - DownloadChunkReq, -}; +use bfsp::{Action, ChunkID, ChunkMetadata, ChunksUploaded, ChunksUploadedQuery, DownloadChunkReq}; use dashmap::DashMap; use log::{debug, info, trace}; use once_cell::sync::Lazy; @@ -97,11 +94,14 @@ pub async fn handle_download_chunk(sock: &mut TcpStream) -> Result<()> { .read(true) .write(false) .append(false) - .open(path) + .open(&path) .await?; + let chunk_file_metadata = fs::metadata(path).await?; + trace!("Sending chunk"); + sock.write_u32(chunk_file_metadata.size() as u32).await?; tokio::io::copy(&mut chunk_file, sock).await?; Ok(()) @@ -159,7 +159,7 @@ async fn handle_upload_chunk(sock: &mut TcpStream) -> Result<()> { let chunk_id = &chunk_metadata.id; let mut chunk_file = fs::File::create(format!("./chunks/{}", chunk_id)).await?; - let expected_size = size_to_encrypted_size(chunk_metadata.size); + let expected_size = sock.read_u32().await?; let mut chunk_sock = sock.take(expected_size.into()); let bytes_copied = tokio::io::copy(&mut chunk_sock, &mut chunk_file).await;