Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring of THCNumerics #678

Closed
wants to merge 23 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions init.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include "THCCachingHostAllocator.h"
#include "THCSleep.h"
#include "THCTensorRandom.h"
#include "THCHalf.h" // for CUDA_HALF_TENSOR

extern void cutorch_CudaByteStorage_init(lua_State* L);
extern void cutorch_CudaCharStorage_init(lua_State* L);
Expand Down Expand Up @@ -721,7 +720,6 @@ static int cutorch_getMemoryUsage(lua_State *L) {

static int cutorch_setDevice(lua_State *L)
{
THCState *state = cutorch_getstate(L);
int device = (int)luaL_checknumber(L, 1)-1;
THCudaCheck(cudaSetDevice(device));
return 0;
Expand Down Expand Up @@ -1091,6 +1089,10 @@ int luaopen_libcutorch(lua_State *L)
#endif
lua_setfield(L, -2, "hasHalf");

/* true fp16 vs pseudo-fp16 mode: this one is per device */
lua_pushboolean(L, THC_nativeHalfInstructions(state));
lua_setfield(L, -2, "hasHalfInstructions");

/* store gpu driver version in field */
int driverVersion;
THCudaCheck(cudaDriverGetVersion(&driverVersion));
Expand Down
63 changes: 58 additions & 5 deletions lib/THC/THCHalf.h
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
#ifndef THC_HALF_CONVERSION_INC
#define THC_HALF_CONVERSION_INC
# define THC_HALF_CONVERSION_INC

#include "THCGeneral.h"
#include "cuda.h"
#include "cuda_runtime.h"
#include "cublas_v2.h"
#include "cuda_fp16.h"

/* We compile with CudaHalfTensor support if we have this: */
#if CUDA_VERSION >= 7050 || CUDA_HAS_FP16
#define CUDA_HALF_TENSOR 1
# define CUDA_HALF_TENSOR 1
#endif

#ifdef CUDA_HALF_TENSOR

#include <cuda_fp16.h>
#include "THCGeneral.h"
#include "THHalf.h"

#include <stdint.h>

THC_EXTERNC void THCFloat2Half(THCState *state, half *out, float *in, ptrdiff_t len);
Expand All @@ -24,6 +29,54 @@ THC_API int THC_nativeHalfInstructions(THCState *state);
/* Check for performant native fp16 support on the current device */
THC_API int THC_fastHalfInstructions(THCState *state);

#endif /* CUDA_HALF_TENSOR */
# if defined (__CUDA_ARCH__)
/* use instrintic functons defined for device only in cuda_fp16.h */
# define THC_FLOAT_TO_HALF(x) __float2half((float)x)
# define THC_HALF_TO_FLOAT(x) __half2float(x)
# define THC_DECL __host__ __device__ __forceinline__
# else
/* use host conversion functions */
# define THC_FLOAT_TO_HALF(x) THC_float2half((float)x)
# define THC_HALF_TO_FLOAT(x) THC_half2float(x)
# define THC_DECL inline
# endif

#if __CUDA_ARCH__ == 600 || __CUDA_ARCH__ >= 620
# define CUDA_HALF_INSTRUCTIONS 1
#endif

#if defined (__cplusplus__) || defined (__CUDACC__)

/// `half` has some type conversion issues associated with it, since it
/// is a struct without a constructor/implicit conversion constructor.
/// We use this to convert scalar values to the given type that the
/// tensor expects.

template <typename In, typename Out>
struct ScalarConvert {
static THC_DECL Out to(const In& v) { return Out(v); }
};

template <typename Out>
struct ScalarConvert<half, Out> {
static THC_DECL Out to(const half& v) {
return (Out) THC_HALF_TO_FLOAT(v);
}
};

template <typename In>
struct ScalarConvert<In, half> {
static THC_DECL half to(const In& v) {
return THC_FLOAT_TO_HALF(v);
}
};

template <>
struct ScalarConvert<half, half> {
static THC_DECL const half& to(const half& v) {
return v;
}
};
# endif /* __cplusplus__ */
# endif /* CUDA_HALF_TENSOR */
#endif /* THC_HALF_CONVERSION_INC */
Loading