Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jlw422 cat #170

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions aten/src/ATen/native/hammerblade/Cat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace at { namespace native {

Tensor _cat_hb(TensorList tensors, int64_t dim) {
TORCH_CHECK(tensors.size() > 0, "_cat_hb: cannot concatenate empty tensor list");
TORCH_CHECK(dim == 0, "this simple cat only takes dim=0");
// TORCH_CHECK(dim == 0, "this simple cat only takes dim=0");
TORCH_CHECK(tensors[0].dim() <= 3, "this simple cat only takes up to 3-dimension tensors");
// convert TensorList length to uint32
uint32_t length_u32 = safe_downcast<uint32_t, size_t>(tensors.size());
Expand All @@ -25,18 +25,28 @@ Tensor _cat_hb(TensorList tensors, int64_t dim) {
uint32_t space = 0;
for (size_t i = 0; i < length_u32; i++) {
TORCH_CHECK(tensors[i].dim() == ndim, "tensors have different dimensions");
space += tensors[i].size(0);
space += tensors[i].size(dim);
}

Tensor result;
if (ndim == 1) {
result = at::empty({space}, tensors[0].options());
}
else if (ndim == 2) {
result = at::empty({space, tensors[0].size(1)}, tensors[0].options());
if (dim==1){
result = at::empty({tensors[0].size(0), space}, tensors[0].options());
}
else{
result = at::empty({space, tensors[0].size(1)}, tensors[0].options());
}
}
else if (ndim == 3) {
result = at::empty({space, tensors[0].size(1), tensors[0].size(2)}, tensors[0].options());
if (dim==1){
result = at::empty({tensors[0].size(0), space, tensors[0].size(2)}, tensors[0].options());
}
else{
result = at::empty({space, tensors[0].size(1), tensors[0].size(2)}, tensors[0].options());
}
}

tensor_args.push_back(result);
Expand Down
17 changes: 17 additions & 0 deletions aten/src/ATen/native/hammerblade/TanhBackward.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include <ATen/Dispatch.h>
#include <ATen/hammerblade/HammerBladeContext.h>
#include <ATen/native/TensorIterator.h>
#include <ATen/native/UnaryOps.h>
#include <ATen/native/hammerblade/Offload.h>

namespace at { namespace native {

Tensor tanh_backward_hb(const Tensor & grad_output, const Tensor & output) {
// AT_DISPATCH_FLOAT_TYPE_ONLY(grad_output.dtype(), output.dtype(), "tanh_backward_hb", [&]() {
hb_offload_kernel(grad_output, output, "tensorlib_tanh_backward");
// });
}

//REGISTER_HAMMERBLADE_DISPATCH(tanh_backward_stub, &tanh_backward_kernel_hb);
//
}} // namespace at::native
1 change: 1 addition & 0 deletions aten/src/ATen/native/native_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6366,6 +6366,7 @@
dispatch:
CPU: legacy::cpu::_thnn_tanh_backward
CUDA: legacy::cuda::_thnn_tanh_backward
HammerBlade: tanh_backward_hb

# What's a thnn_conv_ versus a slow_conv_?
#
Expand Down
42 changes: 33 additions & 9 deletions hammerblade/torch/kernel/kernel_cat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//====================================================================
// simple _cat kernel only works with 0 dim
//
// Authors : Lin Cheng, Janice Wei
// Authors : Lin Cheng, Jack Weber
// Date : 07/29/2020, 08/04/2020

#define BUF_SIZE 16
Expand All @@ -14,7 +14,7 @@ extern "C" {
//====================================================================
// tensorlib__cat
//====================================================================
// This is a simple _cat kernel only works with 0 dim
// This is a simple _cat kernel only works with 0 and 1 dim

__attribute__ ((noinline))
int tensorlib__cat( hb_tensor_t** tensors_p, hb_tensor_t* result_p,
Expand All @@ -23,30 +23,54 @@ int tensorlib__cat( hb_tensor_t** tensors_p, hb_tensor_t* result_p,
HBTensor<float> result(result_p);
uint32_t length = *length_p;
hb_assert(length <= BUF_SIZE);
int32_t dim = *dim_p;
uint32_t dim = *dim_p;
int32_t arr[BUF_SIZE];
int32_t dim0[BUF_SIZE];
int32_t dim1[BUF_SIZE];

// collect tensors' size
for(size_t i = 0; i < length; i++) {
HBTensor<float> tensor(tensors_p[i]);
arr[i] = tensor.numel();
dim0[i] = tensor.dim(0);
int32_t n = tensor.ndim();
if (n>1){
dim1[i] = tensor.dim(1);
}
}
bsg_cuda_print_stat_kernel_start();
bsg_saif_start();


bsg_cuda_print_stat_kernel_start();
hb_tiled_for(result.numel(), [&] (int32_t i) {
int32_t j = 0;
int32_t index = 0;
int32_t size = arr[0];
int32_t size = arr[0];
int32_t q = 0;

if (dim == 1){
size = dim0[0]*dim1[0];
}

while (i >= size) {
index = i - size;
j++;
size += arr[j];

if (dim == 1){
q = j%length;
size += dim0[q]*dim1[q];
}
else{
q = j;
size += arr[j];
}
}
if (j == 0) {
index = i;
}
HBTensor<float> t(tensors_p[j]);
if (j>=length && dim == 1){
index = index + dim0[q]*dim1[q];
}

HBTensor<float> t(tensors_p[q]);
result(i) = t(index);
});

Expand Down
42 changes: 42 additions & 0 deletions hammerblade/torch/kernel/kernel_tanhbackwards.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
//====================================================================
//// tanh kernel
////====================================================================
//// Returns a new tensor with the tangent of the elements of input
//// Used in RNN.
////
//// Authors : Jack Weber
//// Date : 05/07/2020
//
#include <kernel_common.hpp>
#include <cmath>

extern "C" {

////====================================================================
//// tensorlib_tanh
////====================================================================
//// This is the tanh kernel for tensors with float elements.

__attribute__ ((noinline))
int tensorlib_tanh_backward(hb_tensor_t* t0_p, hb_tensor_t* t1_p)
{
auto res = HBTensor<float>(t0_p);
auto input = HBTensor<float>(t1_p);

bsg_cuda_print_stat_kernel_start();
hb_tiled_foreach(
[](float a) {
a = tanh(a);
a = pow(a,2);
a = 1 - a;
return a;
},
res, input);

bsg_cuda_print_stat_kernel_end();
return 0;
}
HB_EMUL_REG_KERNEL(tensorlib_tanh_backward, hb_tensor_t*, hb_tensor_t*)

} /* extern C */

53 changes: 45 additions & 8 deletions hammerblade/torch/tests/test_cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,60 +15,97 @@ def _test_torch_cat(x, y, z):
assert y_h.device == torch.device("hammerblade")
assert torch.allclose(y, y_h.cpu())

def _test_torch_cat1d(x, y, z):
x_h = x.hammerblade()
y_h = y.hammerblade()
z_h = z.hammerblade()
a = torch.cat([x, y, z], 1)
a_h = torch.cat([x_h, y_h, z_h], 1)
assert y_h.device == torch.device("hammerblade")
assert torch.allclose(y, y_h.cpu())

def test_cat_1():
x = torch.ones(10)
_test_torch_cat(x, x, x)


def test_cat_1_dif_sizes():
x = torch.randn(3)
y = torch.randn(2)
z = torch.tensor([])
_test_torch_cat(x, y, z)
# _test_torch_cat1d(x, y, z)

def test_cat_2():
x = torch.randn(3, 4)
_test_torch_cat(x, x, x)
_test_torch_cat1d(x, x, x)

def test_cat_2_dif_sizes():
x = torch.randn(3, 4)
y = torch.randn(2, 4)
z = torch.randn(4, 4)
_test_torch_cat(x, y, z)

def test_cat1d_2_dif_sizes():
x = torch.randn(4, 2)
y = torch.randn(4, 3)
z = torch.randn(4, 4)
_test_torch_cat1d(x, y, z)

def test_cat_3():
x = torch.randn(3, 4, 5)
_test_torch_cat(x, x, x)
_test_torch_cat1d(x, x, x)

def test_cat_3_dif_sizes():
x = torch.randn(3, 4, 5)
y = torch.randn(2, 4, 5)
z = torch.randn(4, 4, 5)
_test_torch_cat(x, y, z)

def test_cat1d_3_dif_sizes():
x = torch.randn(4, 4, 5)
y = torch.randn(4, 3, 5)
z = torch.randn(4, 2, 5)
_test_torch_cat1d(x, y, z)


@settings(deadline=None)
@given(inputs=hu.tensors(n=3, min_dim=1, max_dim=3))
def test_cat_hypothesis(inputs):
@given(inputs=hu.tensors(n=3, min_dim=2, max_dim=3))
def test_cat1d_hypothesis(inputs):
x1 = torch.tensor(inputs[0])
x2 = torch.tensor(inputs[1])
x3 = torch.tensor(inputs[2])
_test_torch_cat(x1, x2, x3)
_test_torch_cat1d(x1, x2, x3)

def test_cat_error_1():
x = torch.randn(3, 4, 5, 2).hammerblade()
with pytest.raises(RuntimeError):
torch.cat([x, x, x], 0)

def test_cat_erorr_2():
x = torch.randn(3, 4).hammerblade()
with pytest.raises(RuntimeError):
torch.cat([x, x, x], 1)

def test_cat_error_3():
with pytest.raises(RuntimeError):
torch.cat([], 0)
with pytest.raises(RuntimeError):
torch.cat([], 1)

def test_cat_error_4():
x = torch.ones(2).hammerblade()
y = torch.randn(3, 4).hammerblade()
with pytest.raises(RuntimeError):
torch.cat([x, y], 0)

def test_cat_error_4():
x = torch.ones(2,3).hammerblade()
y = torch.randn(3,4,5).hammerblade()
with pytest.raises(RuntimeError):
torch.cat([x, y], 1)

@settings(deadline=None)
@given(inputs=hu.tensors(n=3, min_dim=1, max_dim=3))
def test_cat_hypothesis(inputs):
x1 = torch.tensor(inputs[0])
x2 = torch.tensor(inputs[1])
x3 = torch.tensor(inputs[2])
_test_torch_cat(x1, x2, x3)