Skip to content

Commit

Permalink
Add ByteString implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
bluk committed Dec 25, 2023
1 parent c381f17 commit 5fac313
Show file tree
Hide file tree
Showing 12 changed files with 336 additions and 121 deletions.
11 changes: 10 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# CHANGELOG

### [Unreleased]

- Add `ByteString` type as a specialized type for serialization and
deserialization of bencoded strings.
- **Breaking change**: Modify `Value` variants to use `ByteString` instead of
`serde_bytes::ByteBuf` for byte string values. Removed the `serde_bytes`
dependency. While it is still possible to use `serde_bytes` in an
application's code, the crate has not reached `1.0`, so the `ByteString` was
added and used as a public API instead.

## [0.7.0] - 2022-07-31

### Added
Expand Down Expand Up @@ -103,7 +113,6 @@

* `Serializer`, `Deserializer`, and related functions.


[Unreleased]: https://github.com/bluk/bt_bencode/compare/v0.7.0...HEAD
[0.7.0]: https://github.com/bluk/bt_bencode/compare/v0.6.1...v0.7.0
[0.6.1]: https://github.com/bluk/bt_bencode/compare/v0.6.0...v0.6.1
Expand Down
5 changes: 2 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ rust-version = "1.36.0"

[dependencies]
serde = {version = "1", default-features = false }
serde_bytes = { version = "0.11", default-features = false }
itoa = {version = "1", default-features = false }

[dev-dependencies]
Expand All @@ -31,9 +30,9 @@ sha1 = "0.10.1"
[features]
default = ["std"]

std = ["serde/std", "serde_bytes/std"]
std = ["serde/std"]

alloc = ["serde/alloc", "serde_bytes/alloc"]
alloc = ["serde/alloc"]

[package.metadata.docs.rs]
all-features = true
Expand Down
202 changes: 202 additions & 0 deletions src/bstring.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
//! Byte string which helps with the deserialization.

use core::{
borrow::{Borrow, BorrowMut},
cmp, fmt,
ops::{Deref, DerefMut},
};

#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::{string::String, vec::Vec};
#[cfg(feature = "std")]
use std::{string::String, vec::Vec};

use serde::{
de::{SeqAccess, Visitor},
Deserialize, Deserializer,
};

/// A sequence of bytes like a `Vec<u8>`.
///
/// Bencoded "strings" are not necessarily UTF-8 encoded values so if a field is
/// not guranteed to be a UTF-8 string, then you should use a `ByteString` or
/// another equivalent type.
///
/// Ideally, if you knew a field was a bencoded "string", then you could use
/// `Vec<u8>` or `&[u8]` to represent the field without having to use a wrapper
/// like `ByteString` (which is just a newtype around `Vec<u8>`). However, due
/// to a limitation within `serde` and Rust, a `Vec<u8>` and `&[u8]` will
/// serialize and deserialize as a list of individual byte elements.
///
/// The `serde_bytes` crate can overcome this limitation. `serde_bytes` is still
/// pre-1.0 at the time of this writing, so a specific type within this crate
/// exists.
///
/// # Examples
///
/// ```rust
/// use bt_bencode::ByteString;
///
/// let bstr = ByteString::from("hello");
/// assert_eq!(bstr.as_slice(), b"hello");
/// assert_eq!(&*bstr, b"hello");
/// assert_eq!(bstr, ByteString::from(String::from("hello")));
///
/// let expected: Vec<u8> = b"hello".to_vec();
/// assert_eq!(*&*bstr, expected);
/// assert_eq!(bstr, expected.into());
///
/// let encoded = bt_bencode::to_vec(&bstr)?;
/// assert_eq!(encoded, b"5:hello");
///
/// let decoded: ByteString = bt_bencode::from_slice(&encoded)?;
/// assert_eq!(decoded.as_slice(), b"hello");
///
/// # Ok::<(), bt_bencode::Error>(())
/// ```
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ByteString(Vec<u8>);

impl AsRef<[u8]> for ByteString {
fn as_ref(&self) -> &[u8] {
&self.0
}
}

impl AsMut<[u8]> for ByteString {
fn as_mut(&mut self) -> &mut [u8] {
&mut self.0
}
}

impl Borrow<[u8]> for ByteString {
fn borrow(&self) -> &[u8] {
&self.0
}
}

impl BorrowMut<[u8]> for ByteString {
fn borrow_mut(&mut self) -> &mut [u8] {
&mut self.0
}
}

impl fmt::Debug for ByteString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(&self.0, f)
}
}

impl Deref for ByteString {
type Target = Vec<u8>;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl DerefMut for ByteString {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

impl<'a> From<&'a [u8]> for ByteString {
fn from(value: &'a [u8]) -> Self {
Self(Vec::from(value))
}
}

impl<'a> From<&'a str> for ByteString {
fn from(value: &'a str) -> Self {
Self(Vec::from(value))
}
}

impl From<String> for ByteString {
fn from(value: String) -> Self {
Self(Vec::from(value))
}
}

impl From<Vec<u8>> for ByteString {
fn from(value: Vec<u8>) -> Self {
Self(value)
}
}

impl serde::Serialize for ByteString {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_bytes(&self.0)
}
}

struct BStringVisitor;

impl<'de> Visitor<'de> for BStringVisitor {
type Value = ByteString;

fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str("byte string")
}

fn visit_seq<V>(self, mut visitor: V) -> Result<Self::Value, V::Error>
where
V: SeqAccess<'de>,
{
let capacity = cmp::min(visitor.size_hint().unwrap_or_default(), 4096);
let mut bytes = Vec::with_capacity(capacity);

while let Some(b) = visitor.next_element()? {
bytes.push(b);
}

Ok(ByteString::from(bytes))
}

fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E> {
Ok(ByteString::from(v))
}

fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(ByteString::from(v))
}

fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(ByteString::from(v))
}

fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(ByteString::from(v))
}
}

impl<'de> Deserialize<'de> for ByteString {
fn deserialize<D>(deserializer: D) -> Result<ByteString, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_byte_buf(BStringVisitor)
}
}

impl ByteString {
/// Returns the inner vector.
#[inline]
#[must_use]
pub fn into_vec(self) -> Vec<u8> {
self.0
}
}
19 changes: 10 additions & 9 deletions src/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -609,8 +609,9 @@ where

#[cfg(test)]
mod tests {
use crate::ByteString;

use super::*;
use serde_bytes::ByteBuf;
use serde_derive::Deserialize;

#[cfg(all(feature = "alloc", not(feature = "std")))]
Expand Down Expand Up @@ -834,23 +835,23 @@ mod tests {
#[test]
fn test_deserialize_integer_as_raw_bytes() -> Result<()> {
#[derive(Debug, PartialEq, Deserialize)]
struct S(ByteBuf);
struct S(ByteString);

let input = "i-1234e";
let s: S = from_slice(input.as_bytes())?;
let expected = S(ByteBuf::from(input.as_bytes().to_vec()));
let expected = S(ByteString::from(input.as_bytes().to_vec()));
assert_eq!(s, expected);
Ok(())
}

#[test]
fn test_deserialize_list_as_raw_bytes() -> Result<()> {
#[derive(Debug, PartialEq, Deserialize)]
struct S(ByteBuf);
struct S(ByteString);

let input = "l4:spam4:eggse";
let s: S = from_slice(input.as_bytes())?;
let expected = S(ByteBuf::from(input.as_bytes().to_vec()));
let expected = S(ByteString::from(input.as_bytes().to_vec()));
assert_eq!(s, expected);
Ok(())
}
Expand All @@ -859,13 +860,13 @@ mod tests {
fn test_deserialize_map_value_as_raw_bytes() -> Result<()> {
#[derive(Debug, PartialEq, Deserialize)]
struct S {
spam: ByteBuf,
spam: ByteString,
}

let input = "d4:spamd1:a1:bee";
let s: S = from_slice(input.as_bytes())?;
let expected = S {
spam: ByteBuf::from(b"d1:a1:be".to_vec()),
spam: ByteString::from(b"d1:a1:be".to_vec()),
};
assert_eq!(s, expected);
Ok(())
Expand All @@ -874,11 +875,11 @@ mod tests {
#[test]
fn test_deserialize_map_as_raw_bytes() -> Result<()> {
#[derive(Debug, PartialEq, Deserialize)]
struct S(ByteBuf);
struct S(ByteString);

let input = "d4:spamd1:a1:bee";
let s: S = from_slice(input.as_bytes())?;
let expected = S(ByteBuf::from(input.as_bytes().to_vec()));
let expected = S(ByteString::from(input.as_bytes().to_vec()));
assert_eq!(s, expected);
Ok(())
}
Expand Down
3 changes: 3 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ extern crate alloc;
#[macro_use]
extern crate serde;

mod bstring;
mod de;
mod error;

Expand All @@ -114,6 +115,8 @@ pub mod write;
mod ser;
pub mod value;

#[doc(inline)]
pub use bstring::ByteString;
#[doc(inline)]
pub use de::{from_slice, Deserializer};
#[doc(inline)]
Expand Down
2 changes: 1 addition & 1 deletion src/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ pub trait Read<'a> {

/// Returns the next byte but does not consume.
///
/// Repeated peeks (with no [next()][Read::next] call) should return the same byte.
/// Repeated peeks (with no [`next()`][Read::next] call) should return the same byte.
fn peek(&mut self) -> Option<Result<u8>>;

/// Returns the position in the stream of bytes.
Expand Down
5 changes: 3 additions & 2 deletions src/ser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -636,8 +636,9 @@ impl<'a> ser::Serializer for &'a mut MapKeySerializer {

#[cfg(test)]
mod tests {
use crate::ByteString;

use super::*;
use serde_bytes::ByteBuf;

#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::{format, string::String, vec};
Expand Down Expand Up @@ -772,7 +773,7 @@ mod tests {

#[test]
fn test_serialize_bytes() {
let value = ByteBuf::from(String::from("123").into_bytes());
let value = ByteString::from(String::from("123").into_bytes());
assert_eq!(to_vec(&&value).unwrap(), String::from("3:123").into_bytes());
}

Expand Down
Loading

0 comments on commit 5fac313

Please sign in to comment.