Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kernel bitwise_or #158

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions aten/src/ATen/native/hammerblade/Or.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include <cmath>
#include <ATen/Dispatch.h>
#include <ATen/hammerblade/HammerBladeContext.h>
#include <ATen/native/TensorIterator.h>
#include <ATen/native/ReduceOps.h>
#include <ATen/native/hammerblade/Offload.h>

namespace at { namespace native {

Tensor or_kernel_hb(const Tensor& self, const Tensor& other) {
TORCH_CHECK(self.numel() == other.numel(), "The size of two tensors should match.");
// TORCH_CHECK(self.scalar_type() == ScalarType::Int || self.scalar_type() == ScalarType::Bool, "HammerBlade or is implemented for Int and Bool only");
// TORCH_CHECK(other.scalar_type() == ScalarType::Int || other.scalar_type() == ScalarType::Bool, "HammerBlade or is implemented for Int and Bool only");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as and, I think if you do self.scalar_type() == kInt it will work
Maybe you need at::kInt ...

Tensor result = at::empty_like(self, self.options());
hb_offload_kernel(result, self, other, "tensorlib_or");
return result;
}

}} // namespace at::native
1 change: 1 addition & 0 deletions aten/src/ATen/native/native_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4009,6 +4009,7 @@
dispatch:
CPU: legacy::cpu::_th_or
CUDA: legacy::cuda::_th_or
HammerBlade: or_kernel_hb

- func: __ior__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
variants: method
Expand Down
37 changes: 37 additions & 0 deletions hammerblade/torch/kernel/kernel_or.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//========================================================================
// Element-wise or kernel
//========================================================================
//
// Authors : Janice Wei
// Date : 10/08/2020

#include <kernel_common.hpp>
#include <cstdint>

extern "C" {

__attribute__ ((noinline)) int tensorlib_or(
hb_tensor_t* t0_p,
hb_tensor_t* t1_p,
hb_tensor_t* t2_p) {
auto res = HBTensor<int>(t0_p);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to and, we need to handle booleans and test for it

auto input1 = HBTensor<int>(t1_p);
auto input2 = HBTensor<int>(t2_p);

bsg_cuda_print_stat_kernel_start();

hb_tiled_foreach(
[](int a, int b) {
return a | b;
},
res, input1, input2);

bsg_cuda_print_stat_kernel_end();

g_barrier.sync();
return 0;
}

HB_EMUL_REG_KERNEL(tensorlib_or, hb_tensor_t*, hb_tensor_t*, hb_tensor_t*)

}
57 changes: 57 additions & 0 deletions hammerblade/torch/tests/test_or.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""
tests of or kernel
Authors : Janice Wei
Date : 10/08/2020
"""

import torch
import random
from hypothesis import given, settings
from .hypothesis_test_util import HypothesisUtil as hu

torch.manual_seed(42)
random.seed(42)

# ------------------------------------------------------------------------
# test of x1 | x2
# ------------------------------------------------------------------------

def _test_or(x1, x2):
h1 = x1.hammerblade()
h2 = x2.hammerblade()
assert h1 is not x1
assert h2 is not x2
y_c = x1 | x2
y_h = h1 | h2
assert y_h.device == torch.device("hammerblade")
assert torch.allclose(y_c, y_h.cpu())

# ------------------------------------------------------------------------
# tests of or kernel with integer elements
# ------------------------------------------------------------------------

def test_or_1():
x = torch.ones(1, 10, dtype=torch.int)
_test_or(x, x)

def test_or_2():
x1 = torch.ones(4, 5, dtype=torch.int)
x2 = torch.ones(4, 5, dtype=torch.int)
_test_or(x1, x2)

def test_or_3():
x = torch.randint(-2 ** 30, 2 ** 30 - 1, (1, 128)).to(torch.int32)
y = torch.randint(-2 ** 30, 2 ** 30 - 1, (1, 128)).to(torch.int32)
_test_or(x, y)

def test_or_4():
x = torch.randint(-2 ** 30, 2 ** 30 - 1, (16, 32)).to(torch.int32)
y = torch.randint(-2 ** 30, 2 ** 30 - 1, (16, 32)).to(torch.int32)
_test_or(x, y)

@settings(deadline=None)
@given(inputs=hu.tensors(n=2))
def test_or_hypothesis(inputs):
x1 = torch.tensor(inputs[0]).to(torch.int32)
x2 = torch.tensor(inputs[1]).to(torch.int32)
_test_or(x1, x2)