Skip to content

Commit

Permalink
fs-storage: format as valid JSON when write to disk (#41)
Browse files Browse the repository at this point in the history
Previously, the write to disk format for `FileStorage` was not a valid JSON. This commit fixes this.
Other changes include:
- The version is bumped to 3.
- Field `data` of `FileStorageData` is renamed to `entries`
- Support for reading version 2 plaintext `FileStorage` format for backwards compatibility is added.
- This support is provided via a helper function `read_version_2_fs`.
- A unit test for `read_version_2_fs` is also added.

---------

Signed-off-by: Tarek <[email protected]>
  • Loading branch information
tareknaser authored May 10, 2024
1 parent 7b1428b commit 028291e
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 79 deletions.
197 changes: 118 additions & 79 deletions fs-storage/src/file_storage.rs
Original file line number Diff line number Diff line change
@@ -1,92 +1,108 @@
use serde::{Deserialize, Serialize};
use std::fs::{self, File};
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::io::{BufWriter, Write};
use std::time::SystemTime;
use std::{
collections::BTreeMap,
path::{Path, PathBuf},
};

use crate::base_storage::BaseStorage;
use crate::utils::read_version_2_fs;
use data_error::{ArklibError, Result};

const STORAGE_VERSION: i32 = 2;
const STORAGE_VERSION_PREFIX: &str = "version ";
/*
Note on `FileStorage` Versioning:
pub struct FileStorage<K, V> {
`FileStorage` is a basic key-value storage system that persists data to disk.
In version 2, `FileStorage` stored data in a plaintext format.
Starting from version 3, data is stored in JSON format.
For backward compatibility, we provide a helper function `read_version_2_fs` to read version 2 format.
*/
const STORAGE_VERSION: i32 = 3;

/// Represents a file storage system that persists data to disk.
pub struct FileStorage<K, V>
where
K: Ord,
{
label: String,
path: PathBuf,
timestamp: SystemTime,
data: BTreeMap<K, V>,
data: FileStorageData<K, V>,
}

/// A struct that represents the data stored in a [`FileStorage`] instance.
///
///
/// This is the data that is serialized and deserialized to and from disk.
#[derive(Serialize, Deserialize)]
pub struct FileStorageData<K, V>
where
K: Ord,
{
version: i32,
entries: BTreeMap<K, V>,
}

impl<K, V> FileStorage<K, V>
where
K: Ord + Clone + serde::Serialize + serde::de::DeserializeOwned,
V: Clone + serde::Serialize + serde::de::DeserializeOwned,
K: Ord
+ Clone
+ serde::Serialize
+ serde::de::DeserializeOwned
+ std::str::FromStr,
V: Clone
+ serde::Serialize
+ serde::de::DeserializeOwned
+ std::str::FromStr,
{
/// Create a new file storage with a diagnostic label and file path
pub fn new(label: String, path: &Path) -> Self {
let mut file_storage = Self {
label,
path: PathBuf::from(path),
timestamp: SystemTime::now(),
data: BTreeMap::new(),
data: FileStorageData {
version: STORAGE_VERSION,
entries: BTreeMap::new(),
},
};

// Load the data from the file
file_storage.data = match file_storage.read_fs() {
file_storage.data.entries = match file_storage.read_fs() {
Ok(data) => data,
Err(_) => BTreeMap::new(),
};
file_storage
}

/// Verify the version stored in the file header
fn verify_version(&self, header: &str) -> Result<()> {
if !header.starts_with(STORAGE_VERSION_PREFIX) {
return Err(ArklibError::Storage(
self.label.clone(),
"Unknown storage version prefix".to_owned(),
));
}

let version = header[STORAGE_VERSION_PREFIX.len()..]
.parse::<i32>()
.map_err(|_err| {
ArklibError::Storage(
self.label.clone(),
"Failed to parse storage version".to_owned(),
)
})?;

if version != STORAGE_VERSION {
return Err(ArklibError::Storage(
self.label.clone(),
format!(
"Storage version mismatch: expected {}, found {}",
STORAGE_VERSION, version
),
));
}

Ok(())
}
}

impl<K, V> BaseStorage<K, V> for FileStorage<K, V>
where
K: Ord + Clone + serde::Serialize + serde::de::DeserializeOwned,
V: Clone + serde::Serialize + serde::de::DeserializeOwned,
K: Ord
+ Clone
+ serde::Serialize
+ serde::de::DeserializeOwned
+ std::str::FromStr,
V: Clone
+ serde::Serialize
+ serde::de::DeserializeOwned
+ std::str::FromStr,
{
fn set(&mut self, id: K, value: V) {
self.data.insert(id, value);
/// Set a key-value pair in the storage
fn set(&mut self, key: K, value: V) {
self.data.entries.insert(key, value);
self.timestamp = std::time::SystemTime::now();
self.write_fs()
.expect("Failed to write data to disk");
}

/// Remove a key-value pair from the storage given a key
fn remove(&mut self, id: &K) -> Result<()> {
self.data.remove(id).ok_or_else(|| {
self.data.entries.remove(id).ok_or_else(|| {
ArklibError::Storage(self.label.clone(), "Key not found".to_owned())
})?;
self.timestamp = std::time::SystemTime::now();
Expand All @@ -95,6 +111,8 @@ where
Ok(())
}

/// Compare the timestamp of the storage file with the timestamp of the storage instance
/// to determine if the storage file has been updated.
fn is_storage_updated(&self) -> Result<bool> {
let file_timestamp = fs::metadata(&self.path)?.modified()?;
let file_time_secs = file_timestamp
Expand All @@ -109,35 +127,59 @@ where
Ok(file_time_secs > self_time_secs)
}

/// Read the data from the storage file
fn read_fs(&mut self) -> Result<BTreeMap<K, V>> {
let file = fs::File::open(&self.path)?;
let reader = BufReader::new(file);
let mut lines = reader.lines();
if !self.path.exists() {
return Err(ArklibError::Storage(
self.label.clone(),
"File does not exist".to_owned(),
));
}

let new_timestamp = fs::metadata(&self.path)?.modified()?;
match lines.next() {
Some(header) => {
let header = header?;
self.verify_version(&header)?;
let mut data = String::new();
for line in lines {
let line = line?;
if line.is_empty() {
continue;
}
data.push_str(&line);
// First check if the file starts with "version: 2"
let file_content = std::fs::read_to_string(&self.path)?;
if file_content.starts_with("version: 2") {
// Attempt to parse the file using the legacy version 2 storage format of FileStorage.
match read_version_2_fs(&self.path) {
Ok(data) => {
log::info!(
"Version 2 storage format detected for {}",
self.label
);
self.timestamp = fs::metadata(&self.path)?.modified()?;
return Ok(data);
}
Err(_) => {
return Err(ArklibError::Storage(
self.label.clone(),
"Storage seems to be version 2, but failed to parse"
.to_owned(),
));
}
let data: BTreeMap<K, V> = serde_json::from_str(&data)?;
self.timestamp = new_timestamp;
Ok(data)
}
None => Err(ArklibError::Storage(
};
}

let file = fs::File::open(&self.path)?;
let data: FileStorageData<K, V> = serde_json::from_reader(file)
.map_err(|err| {
ArklibError::Storage(self.label.clone(), err.to_string())
})?;
let version = data.version;
if version != STORAGE_VERSION {
return Err(ArklibError::Storage(
self.label.clone(),
"Storage file is missing header".to_owned(),
)),
format!(
"Storage version mismatch: expected {}, got {}",
STORAGE_VERSION, version
),
));
}
self.timestamp = fs::metadata(&self.path)?.modified()?;

Ok(data.entries)
}

/// Write the data to the storage file
fn write_fs(&mut self) -> Result<()> {
let parent_dir = self.path.parent().ok_or_else(|| {
ArklibError::Storage(
Expand All @@ -148,40 +190,37 @@ where
fs::create_dir_all(parent_dir)?;
let file = File::create(&self.path)?;
let mut writer = BufWriter::new(file);

writer.write_all(
format!("{}{}\n", STORAGE_VERSION_PREFIX, STORAGE_VERSION)
.as_bytes(),
)?;

let value_map = self.data.clone();
let value_data = serde_json::to_string(&value_map)?;
let value_data = serde_json::to_string_pretty(&self.data)?;
writer.write_all(value_data.as_bytes())?;

let new_timestamp = fs::metadata(&self.path)?.modified()?;
if new_timestamp == self.timestamp {
return Err("Timestamp didn't update".into());
return Err("Timestamp has not been updated".into());
}
self.timestamp = new_timestamp;

log::info!(
"{} {} entries have been written",
self.label,
value_map.len()
self.data.entries.len()
);
Ok(())
}

/// Erase the storage file from disk
fn erase(&self) -> Result<()> {
fs::remove_file(&self.path).map_err(|err| {
ArklibError::Storage(self.label.clone(), err.to_string())
})
}
}

impl<K, V> AsRef<BTreeMap<K, V>> for FileStorage<K, V> {
impl<K, V> AsRef<BTreeMap<K, V>> for FileStorage<K, V>
where
K: Ord,
{
fn as_ref(&self) -> &BTreeMap<K, V> {
&self.data
&self.data.entries
}
}

Expand Down
1 change: 1 addition & 0 deletions fs-storage/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pub mod base_storage;
pub mod file_storage;
mod utils;
pub const ARK_FOLDER: &str = ".ark";

// Should not be lost if possible
Expand Down
82 changes: 82 additions & 0 deletions fs-storage/src/utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
use data_error::Result;
use std::collections::BTreeMap;
use std::path::Path;

/// Parses version 2 `FileStorage` format and returns the data as a BTreeMap
///
/// Version 2 `FileStorage` format represents data as a BTreeMap in plaintext.
///
/// For example:
/// ```text
/// version: 2
/// key1:1
/// key2:2
/// key3:3
/// ```
pub fn read_version_2_fs<K, V>(path: &Path) -> Result<BTreeMap<K, V>>
where
K: Ord
+ Clone
+ serde::Serialize
+ serde::de::DeserializeOwned
+ std::str::FromStr,
V: Clone
+ serde::Serialize
+ serde::de::DeserializeOwned
+ std::str::FromStr,
{
// First check if the file starts with "version: 2"
let file_content = std::fs::read_to_string(path)?;
if !file_content.starts_with("version: 2") {
return Err(data_error::ArklibError::Parse);
}

// Parse the file content into a BTreeMap
let mut data = BTreeMap::new();
for line in file_content.lines().skip(1) {
let mut parts = line.split(':');
let key = parts
.next()
.unwrap()
.parse()
.map_err(|_| data_error::ArklibError::Parse)?;
let value = parts
.next()
.unwrap()
.parse()
.map_err(|_| data_error::ArklibError::Parse)?;

data.insert(key, value);
}

Ok(data)
}

#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempdir::TempDir;

/// Test reading a legacy version 2 `FileStorage` file
#[test]
fn test_read_legacy_fs() {
let temp_dir = TempDir::new("ark-rust").unwrap();
let file_path = temp_dir.path().join("test_read_legacy_fs");
let file_content = r#"version: 2
key1:1
key2:2
key3:3
"#;
let mut file = std::fs::File::create(&file_path).unwrap();
file.write_all(file_content.as_bytes()).unwrap();

// Read the file and check the data
let data: BTreeMap<String, i32> =
read_version_2_fs(&file_path).unwrap();
assert_eq!(data.len(), 3);
assert_eq!(data.get("key1"), Some(&1));
assert_eq!(data.get("key2"), Some(&2));
assert_eq!(data.get("key3"), Some(&3));
}
}

0 comments on commit 028291e

Please sign in to comment.