From 028291e891fff7693f487e538a26ba0165959bd0 Mon Sep 17 00:00:00 2001 From: Tarek Elsayed <60650661+tareknaser@users.noreply.github.com> Date: Fri, 10 May 2024 19:29:57 +0300 Subject: [PATCH] `fs-storage`: format as valid JSON when write to disk (#41) Previously, the write to disk format for `FileStorage` was not a valid JSON. This commit fixes this. Other changes include: - The version is bumped to 3. - Field `data` of `FileStorageData` is renamed to `entries` - Support for reading version 2 plaintext `FileStorage` format for backwards compatibility is added. - This support is provided via a helper function `read_version_2_fs`. - A unit test for `read_version_2_fs` is also added. --------- Signed-off-by: Tarek --- fs-storage/src/file_storage.rs | 197 ++++++++++++++++++++------------- fs-storage/src/lib.rs | 1 + fs-storage/src/utils.rs | 82 ++++++++++++++ 3 files changed, 201 insertions(+), 79 deletions(-) create mode 100644 fs-storage/src/utils.rs diff --git a/fs-storage/src/file_storage.rs b/fs-storage/src/file_storage.rs index 23efeab5..d9096400 100644 --- a/fs-storage/src/file_storage.rs +++ b/fs-storage/src/file_storage.rs @@ -1,5 +1,6 @@ +use serde::{Deserialize, Serialize}; use std::fs::{self, File}; -use std::io::{BufRead, BufReader, BufWriter, Write}; +use std::io::{BufWriter, Write}; use std::time::SystemTime; use std::{ collections::BTreeMap, @@ -7,22 +8,56 @@ use std::{ }; use crate::base_storage::BaseStorage; +use crate::utils::read_version_2_fs; use data_error::{ArklibError, Result}; -const STORAGE_VERSION: i32 = 2; -const STORAGE_VERSION_PREFIX: &str = "version "; +/* +Note on `FileStorage` Versioning: -pub struct FileStorage { +`FileStorage` is a basic key-value storage system that persists data to disk. + +In version 2, `FileStorage` stored data in a plaintext format. +Starting from version 3, data is stored in JSON format. + +For backward compatibility, we provide a helper function `read_version_2_fs` to read version 2 format. +*/ +const STORAGE_VERSION: i32 = 3; + +/// Represents a file storage system that persists data to disk. +pub struct FileStorage +where + K: Ord, +{ label: String, path: PathBuf, timestamp: SystemTime, - data: BTreeMap, + data: FileStorageData, +} + +/// A struct that represents the data stored in a [`FileStorage`] instance. +/// +/// +/// This is the data that is serialized and deserialized to and from disk. +#[derive(Serialize, Deserialize)] +pub struct FileStorageData +where + K: Ord, +{ + version: i32, + entries: BTreeMap, } impl FileStorage where - K: Ord + Clone + serde::Serialize + serde::de::DeserializeOwned, - V: Clone + serde::Serialize + serde::de::DeserializeOwned, + K: Ord + + Clone + + serde::Serialize + + serde::de::DeserializeOwned + + std::str::FromStr, + V: Clone + + serde::Serialize + + serde::de::DeserializeOwned + + std::str::FromStr, { /// Create a new file storage with a diagnostic label and file path pub fn new(label: String, path: &Path) -> Self { @@ -30,63 +65,44 @@ where label, path: PathBuf::from(path), timestamp: SystemTime::now(), - data: BTreeMap::new(), + data: FileStorageData { + version: STORAGE_VERSION, + entries: BTreeMap::new(), + }, }; // Load the data from the file - file_storage.data = match file_storage.read_fs() { + file_storage.data.entries = match file_storage.read_fs() { Ok(data) => data, Err(_) => BTreeMap::new(), }; file_storage } - - /// Verify the version stored in the file header - fn verify_version(&self, header: &str) -> Result<()> { - if !header.starts_with(STORAGE_VERSION_PREFIX) { - return Err(ArklibError::Storage( - self.label.clone(), - "Unknown storage version prefix".to_owned(), - )); - } - - let version = header[STORAGE_VERSION_PREFIX.len()..] - .parse::() - .map_err(|_err| { - ArklibError::Storage( - self.label.clone(), - "Failed to parse storage version".to_owned(), - ) - })?; - - if version != STORAGE_VERSION { - return Err(ArklibError::Storage( - self.label.clone(), - format!( - "Storage version mismatch: expected {}, found {}", - STORAGE_VERSION, version - ), - )); - } - - Ok(()) - } } impl BaseStorage for FileStorage where - K: Ord + Clone + serde::Serialize + serde::de::DeserializeOwned, - V: Clone + serde::Serialize + serde::de::DeserializeOwned, + K: Ord + + Clone + + serde::Serialize + + serde::de::DeserializeOwned + + std::str::FromStr, + V: Clone + + serde::Serialize + + serde::de::DeserializeOwned + + std::str::FromStr, { - fn set(&mut self, id: K, value: V) { - self.data.insert(id, value); + /// Set a key-value pair in the storage + fn set(&mut self, key: K, value: V) { + self.data.entries.insert(key, value); self.timestamp = std::time::SystemTime::now(); self.write_fs() .expect("Failed to write data to disk"); } + /// Remove a key-value pair from the storage given a key fn remove(&mut self, id: &K) -> Result<()> { - self.data.remove(id).ok_or_else(|| { + self.data.entries.remove(id).ok_or_else(|| { ArklibError::Storage(self.label.clone(), "Key not found".to_owned()) })?; self.timestamp = std::time::SystemTime::now(); @@ -95,6 +111,8 @@ where Ok(()) } + /// Compare the timestamp of the storage file with the timestamp of the storage instance + /// to determine if the storage file has been updated. fn is_storage_updated(&self) -> Result { let file_timestamp = fs::metadata(&self.path)?.modified()?; let file_time_secs = file_timestamp @@ -109,35 +127,59 @@ where Ok(file_time_secs > self_time_secs) } + /// Read the data from the storage file fn read_fs(&mut self) -> Result> { - let file = fs::File::open(&self.path)?; - let reader = BufReader::new(file); - let mut lines = reader.lines(); + if !self.path.exists() { + return Err(ArklibError::Storage( + self.label.clone(), + "File does not exist".to_owned(), + )); + } - let new_timestamp = fs::metadata(&self.path)?.modified()?; - match lines.next() { - Some(header) => { - let header = header?; - self.verify_version(&header)?; - let mut data = String::new(); - for line in lines { - let line = line?; - if line.is_empty() { - continue; - } - data.push_str(&line); + // First check if the file starts with "version: 2" + let file_content = std::fs::read_to_string(&self.path)?; + if file_content.starts_with("version: 2") { + // Attempt to parse the file using the legacy version 2 storage format of FileStorage. + match read_version_2_fs(&self.path) { + Ok(data) => { + log::info!( + "Version 2 storage format detected for {}", + self.label + ); + self.timestamp = fs::metadata(&self.path)?.modified()?; + return Ok(data); + } + Err(_) => { + return Err(ArklibError::Storage( + self.label.clone(), + "Storage seems to be version 2, but failed to parse" + .to_owned(), + )); } - let data: BTreeMap = serde_json::from_str(&data)?; - self.timestamp = new_timestamp; - Ok(data) - } - None => Err(ArklibError::Storage( + }; + } + + let file = fs::File::open(&self.path)?; + let data: FileStorageData = serde_json::from_reader(file) + .map_err(|err| { + ArklibError::Storage(self.label.clone(), err.to_string()) + })?; + let version = data.version; + if version != STORAGE_VERSION { + return Err(ArklibError::Storage( self.label.clone(), - "Storage file is missing header".to_owned(), - )), + format!( + "Storage version mismatch: expected {}, got {}", + STORAGE_VERSION, version + ), + )); } + self.timestamp = fs::metadata(&self.path)?.modified()?; + + Ok(data.entries) } + /// Write the data to the storage file fn write_fs(&mut self) -> Result<()> { let parent_dir = self.path.parent().ok_or_else(|| { ArklibError::Storage( @@ -148,30 +190,24 @@ where fs::create_dir_all(parent_dir)?; let file = File::create(&self.path)?; let mut writer = BufWriter::new(file); - - writer.write_all( - format!("{}{}\n", STORAGE_VERSION_PREFIX, STORAGE_VERSION) - .as_bytes(), - )?; - - let value_map = self.data.clone(); - let value_data = serde_json::to_string(&value_map)?; + let value_data = serde_json::to_string_pretty(&self.data)?; writer.write_all(value_data.as_bytes())?; let new_timestamp = fs::metadata(&self.path)?.modified()?; if new_timestamp == self.timestamp { - return Err("Timestamp didn't update".into()); + return Err("Timestamp has not been updated".into()); } self.timestamp = new_timestamp; log::info!( "{} {} entries have been written", self.label, - value_map.len() + self.data.entries.len() ); Ok(()) } + /// Erase the storage file from disk fn erase(&self) -> Result<()> { fs::remove_file(&self.path).map_err(|err| { ArklibError::Storage(self.label.clone(), err.to_string()) @@ -179,9 +215,12 @@ where } } -impl AsRef> for FileStorage { +impl AsRef> for FileStorage +where + K: Ord, +{ fn as_ref(&self) -> &BTreeMap { - &self.data + &self.data.entries } } diff --git a/fs-storage/src/lib.rs b/fs-storage/src/lib.rs index 3b81ebfe..a6a6b03c 100644 --- a/fs-storage/src/lib.rs +++ b/fs-storage/src/lib.rs @@ -1,5 +1,6 @@ pub mod base_storage; pub mod file_storage; +mod utils; pub const ARK_FOLDER: &str = ".ark"; // Should not be lost if possible diff --git a/fs-storage/src/utils.rs b/fs-storage/src/utils.rs new file mode 100644 index 00000000..b5b6830a --- /dev/null +++ b/fs-storage/src/utils.rs @@ -0,0 +1,82 @@ +use data_error::Result; +use std::collections::BTreeMap; +use std::path::Path; + +/// Parses version 2 `FileStorage` format and returns the data as a BTreeMap +/// +/// Version 2 `FileStorage` format represents data as a BTreeMap in plaintext. +/// +/// For example: +/// ```text +/// version: 2 +/// key1:1 +/// key2:2 +/// key3:3 +/// ``` +pub fn read_version_2_fs(path: &Path) -> Result> +where + K: Ord + + Clone + + serde::Serialize + + serde::de::DeserializeOwned + + std::str::FromStr, + V: Clone + + serde::Serialize + + serde::de::DeserializeOwned + + std::str::FromStr, +{ + // First check if the file starts with "version: 2" + let file_content = std::fs::read_to_string(path)?; + if !file_content.starts_with("version: 2") { + return Err(data_error::ArklibError::Parse); + } + + // Parse the file content into a BTreeMap + let mut data = BTreeMap::new(); + for line in file_content.lines().skip(1) { + let mut parts = line.split(':'); + let key = parts + .next() + .unwrap() + .parse() + .map_err(|_| data_error::ArklibError::Parse)?; + let value = parts + .next() + .unwrap() + .parse() + .map_err(|_| data_error::ArklibError::Parse)?; + + data.insert(key, value); + } + + Ok(data) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use tempdir::TempDir; + + /// Test reading a legacy version 2 `FileStorage` file + #[test] + fn test_read_legacy_fs() { + let temp_dir = TempDir::new("ark-rust").unwrap(); + let file_path = temp_dir.path().join("test_read_legacy_fs"); + let file_content = r#"version: 2 +key1:1 +key2:2 +key3:3 +"#; + let mut file = std::fs::File::create(&file_path).unwrap(); + file.write_all(file_content.as_bytes()).unwrap(); + + // Read the file and check the data + let data: BTreeMap = + read_version_2_fs(&file_path).unwrap(); + assert_eq!(data.len(), 3); + assert_eq!(data.get("key1"), Some(&1)); + assert_eq!(data.get("key2"), Some(&2)); + assert_eq!(data.get("key3"), Some(&3)); + } +}