Skip to content

Commit

Permalink
Merge branch 'main' into webgl
Browse files Browse the repository at this point in the history
  • Loading branch information
elftausend committed Sep 24, 2024
2 parents bc39a24 + 4dc54d1 commit 7c830ca
Show file tree
Hide file tree
Showing 33 changed files with 179 additions and 163 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ repository = "https://github.com/elftausend/custos"
keywords = ["gpu", "autodiff", "arrays", "deep-learning", "fixed-size"]
categories = ["science", "mathematics", "no-std", "external-ffi-bindings"]
readme = "README.md"
rust-version = "1.79"
rust-version = "1.81"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
Expand Down
2 changes: 1 addition & 1 deletion examples/lazy_and_fusing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ fn main() {
device.unary_fusing(&device, None).unwrap();

// this executes all operations inside the lazy graph
unsafe { device.run().unwrap() };
device.run().unwrap();

for (input, out) in buf.read().iter().zip(out2.replace().read()) {
assert!((out - (input + 1.).sin()).abs() < 0.01);
Expand Down
6 changes: 3 additions & 3 deletions examples/modules_usage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ fn main() {
let rhs = device.buffer([1, 2, 3, 4, 5]);

let out = device.add(&lhs, &rhs).unwrap();
unsafe { device.run().unwrap() }; // allocates memory and executes all operations inside the lazy graph
device.run().unwrap(); // allocates memory and executes all operations inside the lazy graph
assert_eq!(out.replace().read(), [2, 4, 6, 8, 10])
}

Expand All @@ -171,7 +171,7 @@ fn main() {
device.unary_fusing(&device, None).unwrap();

// this executes all operations inside the lazy graph
unsafe { device.run().unwrap() };
device.run().unwrap();

for (input, out) in buf.read().iter().zip(out2.replace().read()) {
assert!((out - (input + 1.).sin()).abs() < 0.01);
Expand All @@ -189,7 +189,7 @@ fn main() {
let rhs = device.buffer([1, 2, 3, 4, 5]);

let out = device.add(&lhs, &rhs).unwrap();
unsafe { device.run().unwrap() };
device.run().unwrap();
assert_eq!(out.replace().read(), vec![2, 4, 6, 8, 10])
}
}
16 changes: 7 additions & 9 deletions src/buffer.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use core::{
ffi::c_void,
mem::ManuallyDrop,
ops::{Deref, DerefMut},
};
Expand All @@ -11,9 +10,9 @@ use crate::cpu::{CPUPtr, CPU};
use crate::CPU;

use crate::{
flag::AllocFlag, shape::Shape, Alloc, Base, ClearBuf, CloneBuf, Device,
DevicelessAble, HasId, IsShapeIndep, OnDropBuffer, OnNewBuffer, PtrType, Read, ReplaceBuf,
ShallowCopy, Unit, WrappedData, WriteBuf, ZeroGrad,
flag::AllocFlag, shape::Shape, Alloc, Base, ClearBuf, CloneBuf, Device, DevicelessAble, HasId,
IsShapeIndep, OnDropBuffer, OnNewBuffer, PtrType, Read, ReplaceBuf, ShallowCopy, Unit,
WrappedData, WriteBuf, ZeroGrad,
};

pub use self::num::Num;
Expand Down Expand Up @@ -583,7 +582,7 @@ impl<'a, Mods: OnDropBuffer, T: Unit, S: Shape> Buffer<'a, T, CPU<Mods>, S> {
impl<'a, Mods: OnDropBuffer, T: Unit, S: Shape> Buffer<'a, T, crate::OpenCL<Mods>, S> {
/// Returns the OpenCL pointer of the `Buffer`.
#[inline]
pub fn cl_ptr(&self) -> *mut c_void {
pub fn cl_ptr(&self) -> *mut core::ffi::c_void {
assert!(
!self.base().ptr.is_null(),
"called cl_ptr() on an invalid OpenCL buffer"
Expand Down Expand Up @@ -687,15 +686,14 @@ where
T: Unit + Debug + Default + Clone + 'a,
D: Read<T, S> + Device + 'a,
for<'b> <D as Read<T, S>>::Read<'b>: Debug,
D::Data<T, S>: Debug,
D::Data<T, S>: Debug,
S: Shape,
{
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("Buffer")
.field("ptr", self.data());
f.debug_struct("Buffer").field("ptr", self.data());
writeln!(f, ",")?;

let data = self.read();
let data = self.read();
writeln!(f, "data: {data:?}")?;
writeln!(f, "len: {:?}", self.len())?;
write!(
Expand Down
40 changes: 28 additions & 12 deletions src/buffer/num.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
use core::{
convert::Infallible,
ffi::c_void,
ops::{Deref, DerefMut},
ptr::null_mut,
};

use crate::{
flag::AllocFlag, Alloc, Buffer, CloneBuf, Device, HasId, OnDropBuffer, PtrType,
ShallowCopy, Unit, WrappedData,
flag::AllocFlag, Alloc, Buffer, CloneBuf, Device, HasId, OnDropBuffer, PtrType, ShallowCopy,
Unit, WrappedData,
};

#[derive(Debug, Default)]
Expand Down Expand Up @@ -175,15 +173,37 @@ impl<'a, T: Unit> Buffer<'a, T, ()> {
///
/// let x: Buffer<f32, _> = 7f32.into();
/// assert_eq!(**x, 7.);
/// assert_eq!(x.item(), 7.);
/// assert_eq!(x.item(), &7.);
///
/// ```
#[inline]
pub fn item(&self) -> T
pub fn item(&self) -> &T
where
T: Unit + Copy,
T: Unit,
{
self.data.num
&self.data.num
}

/// Used if the `Buffer` contains only a single value.
/// By derefencing this `Buffer`, you obtain this value as well (which is probably preferred).
///
/// # Example
///
/// ```
/// use custos::Buffer;
///
/// let mut x: Buffer<f32, _> = 7f32.into();
/// assert_eq!(**x, 7.);
/// *x.item_mut() += 1.;
/// assert_eq!(*x.item_mut(), 8.);
///
/// ```
#[inline]
pub fn item_mut(&mut self) -> &mut T
where
T: Unit,
{
&mut self.data.num
}
}

Expand Down Expand Up @@ -229,8 +249,4 @@ mod tests {

<()>::new().unwrap();
}

#[cfg(feature = "lazy")]
#[test]
fn test_num_device_lazy() {}
}
38 changes: 38 additions & 0 deletions src/cache/owned_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,4 +129,42 @@ mod tests {
}
assert_eq!(cache.nodes.len(), 1);
}

#[cfg(feature = "cpu")]
#[test]
#[should_panic]
fn test_cache_with_diffrent_length_return() {
use crate::{Buffer, Cursor, Retriever, Base};

let dev = CPU::<Cached<Base>>::new();

for i in dev.range(10) {
if i == 4 {
// has assert inside, therefore, this line leads to a crash due tue mismatiching lengths
let buf: Buffer<u8, _> = dev.retrieve(5, ()).unwrap();
assert_eq!(buf.len, 5);
} else {
let _x: Buffer<u8, _> = dev.retrieve(3, ()).unwrap();
}
}
}

#[cfg(feature = "cpu")]
#[test]
fn test_cache_with_cursor_range_overlap() {
use crate::{Buffer, Cursor, Retriever, Base};

let dev = CPU::<Cached<Base>>::new();

for _i in dev.range(10) {
let _x: Buffer<u8, _> = dev.retrieve(3, ()).unwrap();
}

assert_eq!(dev.cursor(), 1);

for _i in dev.range(1..7) {
let _x: Buffer<u8, _> = dev.retrieve(4, ()).unwrap();
}
assert_eq!(dev.cursor(), 2);
}
}
2 changes: 1 addition & 1 deletion src/devices/cpu/cpu_device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ impl<Mods> crate::LazyRun for CPU<Mods> {}

impl<Mods: crate::RunModule<Self>> crate::Run for CPU<Mods> {
#[inline]
unsafe fn run(&self) -> crate::Result<()> {
fn run(&self) -> crate::Result<()> {
self.modules.run(self)
}
}
Expand Down
24 changes: 11 additions & 13 deletions src/devices/cpu/cpu_ptr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,7 @@ impl<T> CPUPtr<T> {
/// ```
#[inline]
pub unsafe fn from_ptr(ptr: *mut T, len: usize, flag: AllocFlag) -> CPUPtr<T> {
CPUPtr {
ptr,
len,
flag,
}
CPUPtr { ptr, len, flag }
}
pub fn from_vec(mut vec: Vec<T>) -> CPUPtr<T> {
// CPUPtr only knows about the length, not the capacity -> deallocation happens with length, which may be less than the capacity
Expand Down Expand Up @@ -246,7 +242,7 @@ impl<T> ShallowCopy for CPUPtr<T> {

pub struct DeallocWithLayout {
ptr: core::mem::ManuallyDrop<CPUPtr<u8>>,
layout: Layout,
layout: Layout,
}

impl DeallocWithLayout {
Expand All @@ -255,14 +251,18 @@ impl DeallocWithLayout {
let (_, layout) = ptr.current_memory()?;
let ptr = core::mem::ManuallyDrop::new(ptr);
Some(Self {
ptr: core::mem::ManuallyDrop::new(CPUPtr { ptr: ptr.ptr as *mut u8, len: ptr.len, flag: ptr.flag }),
layout
ptr: core::mem::ManuallyDrop::new(CPUPtr {
ptr: ptr.ptr as *mut u8,
len: ptr.len,
flag: ptr.flag,
}),
layout,
})
}

#[inline]
#[inline]
pub fn layout(&self) -> &Layout {
&self.layout
&self.layout
}
}

Expand Down Expand Up @@ -452,9 +452,7 @@ mod tests {
#[test]
fn test_dealloc_with_layout() {
let data = CPUPtr::<f32>::new_initialized(10, crate::flag::AllocFlag::None);
let dealloc = unsafe {
DeallocWithLayout::new(data).unwrap()
};
let dealloc = unsafe { DeallocWithLayout::new(data).unwrap() };
assert_eq!(dealloc.layout().size(), 40)
}
}
6 changes: 2 additions & 4 deletions src/devices/cuda/cuda_ptr.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
use core::{marker::PhantomData, ptr::null_mut};

use crate::{flag::AllocFlag, HasId, Id, PtrType, ShallowCopy};

use super::api::{cu_read, cufree, cumalloc, CudaResult};
use crate::{flag::AllocFlag, HasId, Id, PtrType, ShallowCopy};
use core::marker::PhantomData;

/// The pointer used for `CUDA` [`Buffer`](crate::Buffer)s
#[derive(Debug, PartialEq, Eq)]
Expand Down
12 changes: 6 additions & 6 deletions src/devices/cuda/lazy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ impl<Mods> crate::LazyRun for CUDA<Mods> {

impl<Mods: crate::RunModule<Self>> crate::Run for CUDA<Mods> {
#[inline]
unsafe fn run(&self) -> crate::Result<()> {
fn run(&self) -> crate::Result<()> {
self.modules.run(self)
}
}
Expand All @@ -74,7 +74,7 @@ impl<Mods> crate::LazySetup for CUDA<Mods> {
#[cfg(test)]
mod tests {
use crate::{
AddOperation, ApplyFunction, AsNoId, Base, Buffer, Combiner, Device, HasId, Lazy, Retrieve,
AddOperation, ApplyFunction, Base, Buffer, Combiner, Device, HasId, Lazy, Retrieve,
Retriever, Run, CUDA,
};

Expand Down Expand Up @@ -133,7 +133,7 @@ mod tests {
assert_eq!(lhs.read(), vec![1, 2, 3, 4, 5, 6]);
assert_eq!(rhs.read(), vec![1, 2, 3, 4, 5, 6]);

unsafe { device.run().unwrap() };
device.run().unwrap();

assert_eq!(out.read(), vec![2, 4, 6, 8, 10, 12]);
assert_eq!(rhs.read(), vec![3, 6, 9, 12, 15, 18]);
Expand Down Expand Up @@ -187,7 +187,7 @@ mod tests {
lhs.write(&[1, 2, 3, 4, 5, 6]);
rhs.write(&[1, 2, 3, 4, 5, 6]);
device.mem_transfer_stream.sync().unwrap();
unsafe { device.run().unwrap() };
device.run().unwrap();
}

assert_eq!(out.read(), vec![2, 4, 6, 8, 10, 12]);
Expand Down Expand Up @@ -294,7 +294,7 @@ mod tests {
let out = cuda_ew(&device, &lhs, &rhs, ew_src("add", '+'), "add");
let out2 = cuda_ew(&device, &out, &rhs, ew_src("add", '+'), "add");

let _ = unsafe { device.run() };
let _ = device.run();

assert_eq!(out.replace().read(), [2, 4, 6, 8, 10, 12]);
assert_eq!(out2.replace().read(), [3, 6, 9, 12, 15, 18]);
Expand All @@ -310,6 +310,6 @@ mod tests {
let out = device.apply_fn(&out, |x| x.cos());
let _out = device.apply_fn(&out, |x| x.ln());

let _ = unsafe { device.run() };
let _ = device.run();
}
}
8 changes: 4 additions & 4 deletions src/devices/cuda/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ use crate::{
bounds_to_range,
cuda::api::{cu_read_async, CUstreamCaptureStatus},
op_hint::unary,
pass_down_add_operation, pass_down_exec_now, AddOperation, ApplyFunction,
Buffer, CDatatype, ClearBuf, CopySlice, OnDropBuffer, Read, Resolve, Retrieve, Retriever,
SetOpHint, Shape, ToCLSource, ToMarker, UnaryGrad, Unit, WriteBuf, ZeroGrad, CUDA,
pass_down_add_operation, pass_down_exec_now, AddOperation, ApplyFunction, Buffer, CDatatype,
ClearBuf, CopySlice, OnDropBuffer, Read, Resolve, Retrieve, Retriever, SetOpHint, Shape,
ToCLSource, ToMarker, UnaryGrad, Unit, WriteBuf, ZeroGrad, CUDA,
};

use super::{
Expand Down Expand Up @@ -291,7 +291,7 @@ mod tests {

assert_eq!(lhs_grad.read(), vec![1, 2, 3, 4, 5, 6]);

unsafe { device.run().unwrap() }
device.run().unwrap();

assert_eq!(lhs_grad.read(), vec![4, 6, 8, 10, 12, 14]);
}
Expand Down
11 changes: 4 additions & 7 deletions src/devices/nnapi/nnapi_device.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::{
cpu::{CPUPtr, DeallocWithLayout}, Alloc, AsOperandCode, Base, Buffer, Device, HasId, IsShapeIndep, Lazy, LazyRun,
LazySetup, Module, OnDropBuffer, PtrType, Retrieve, Retriever, Setup, Shape, Unit, WrappedData,
cpu::{CPUPtr, DeallocWithLayout},
Alloc, AsOperandCode, Base, Buffer, Device, HasId, IsShapeIndep, Lazy, LazyRun, LazySetup,
Module, OnDropBuffer, PtrType, Retrieve, Retriever, Setup, Shape, Unit, WrappedData,
};

use super::NnapiPtr;
Expand Down Expand Up @@ -215,11 +216,7 @@ impl<T, Mods: OnDropBuffer> NnapiDevice<T, Mods> {
fn set_input_ptrs(&self, run: &mut Execution) -> crate::Result<()> {
for (idx, (_id, input_ptr)) in self.input_ptrs.borrow().iter().enumerate() {
unsafe {
run.set_input_raw(
idx as i32,
input_ptr.ptr.cast(),
input_ptr.layout().size()
)
run.set_input_raw(idx as i32, input_ptr.ptr.cast(), input_ptr.layout().size())
}?
}
Ok(())
Expand Down
2 changes: 1 addition & 1 deletion src/devices/opencl/cl_device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ pass_down_use_gpu_or_cpu!(OpenCL);

impl<Mods: crate::RunModule<Self>> crate::Run for OpenCL<Mods> {
#[inline]
unsafe fn run(&self) -> crate::Result<()> {
fn run(&self) -> crate::Result<()> {
self.modules.run(self)
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/devices/opencl/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ mod test {
let out = Buffer::from((&device, [1, 1, 1, 1, 1, 1]));

device.add_unary_grad(&lhs, &mut lhs_grad, &out, |x| x.mul(2).add(1));
unsafe { device.run().unwrap() };
device.run().unwrap();

assert_eq!(lhs_grad.read(), [4, 7, 10, 13, 16, 19]);
}
Expand Down
Loading

0 comments on commit 7c830ca

Please sign in to comment.