Skip to content

Commit

Permalink
[HIPIFY][bfp16] Bfloat16 Precision Intrinsics support - Part 2 - final
Browse files Browse the repository at this point in the history
+ Updated the regenerated hipify-perl and CUDA_Device_API_supported_by_HIP.md accordingly

[ToDo]
+ There are plenty of overridden `bfloat16` functions which are not supported yet; so take them first into account while implementing ROCm#662
  • Loading branch information
emankov committed Oct 22, 2022
1 parent f31b29a commit 167e52e
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 0 deletions.
28 changes: 28 additions & 0 deletions bin/hipify-perl
Original file line number Diff line number Diff line change
Expand Up @@ -5397,10 +5397,15 @@ sub warnUnsupportedDeviceFunctions {
"__vabsdiffs2",
"__vabs4",
"__vabs2",
"__ushort_as_bfloat16",
"__ushort2bfloat16_rz",
"__ushort2bfloat16_ru",
"__ushort2bfloat16_rn",
"__ushort2bfloat16_rd",
"__ull2bfloat16_rz",
"__ull2bfloat16_ru",
"__ull2bfloat16_rn",
"__ull2bfloat16_rd",
"__uint2bfloat16_rz",
"__uint2bfloat16_ru",
"__uint2bfloat16_rn",
Expand All @@ -5413,6 +5418,7 @@ sub warnUnsupportedDeviceFunctions {
"__signbitl",
"__signbitf",
"__signbit",
"__short_as_bfloat16",
"__short2bfloat16_rz",
"__short2bfloat16_ru",
"__short2bfloat16_rn",
Expand All @@ -5436,6 +5442,13 @@ sub warnUnsupportedDeviceFunctions {
"__nv_cvt_double2_to_fp8x2",
"__nv_cvt_bfloat16raw_to_fp8",
"__nv_cvt_bfloat16raw2_to_fp8x2",
"__lows2bfloat162",
"__low2bfloat162",
"__low2bfloat16",
"__ll2bfloat16_rz",
"__ll2bfloat16_ru",
"__ll2bfloat16_rn",
"__ll2bfloat16_rd",
"__ldlu",
"__ldcv",
"__isnanl",
Expand All @@ -5460,9 +5473,13 @@ sub warnUnsupportedDeviceFunctions {
"__hmax2_nan",
"__hmax2",
"__hmax",
"__highs2bfloat162",
"__high2bfloat162",
"__high2bfloat16",
"__hfma_relu",
"__hfma2_relu",
"__hcmadd",
"__halves2bfloat162",
"__hadd_rn",
"__hadd2_rn",
"__fsub_rz",
Expand Down Expand Up @@ -5520,10 +5537,16 @@ sub warnUnsupportedDeviceFunctions {
"__dadd_ru",
"__dadd_rd",
"__brkpt",
"__bfloat16_as_ushort",
"__bfloat16_as_short",
"__bfloat162ushort_rz",
"__bfloat162ushort_ru",
"__bfloat162ushort_rn",
"__bfloat162ushort_rd",
"__bfloat162ull_rz",
"__bfloat162ull_ru",
"__bfloat162ull_rn",
"__bfloat162ull_rd",
"__bfloat162uint_rz",
"__bfloat162uint_ru",
"__bfloat162uint_rn",
Expand All @@ -5532,11 +5555,16 @@ sub warnUnsupportedDeviceFunctions {
"__bfloat162short_ru",
"__bfloat162short_rn",
"__bfloat162short_rd",
"__bfloat162ll_rz",
"__bfloat162ll_ru",
"__bfloat162ll_rn",
"__bfloat162ll_rd",
"__bfloat162int_rz",
"__bfloat162int_ru",
"__bfloat162int_rn",
"__bfloat162int_rd",
"__bfloat162float",
"__bfloat162bfloat162",
"__bfloat1622float2",
"_Pow_int"
)
Expand Down
28 changes: 28 additions & 0 deletions doc/markdown/CUDA_Device_API_supported_by_HIP.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,16 @@
|`__assertfail`| | | |`__assertfail`|1.9.0| | | |
|`__ballot`| | | |`__ballot`|1.6.0| | | |
|`__bfloat1622float2`|11.0| | | | | | | |
|`__bfloat162bfloat162`|11.0| | | | | | | |
|`__bfloat162float`|11.0| | | | | | | |
|`__bfloat162int_rd`|11.0| | | | | | | |
|`__bfloat162int_rn`|11.0| | | | | | | |
|`__bfloat162int_ru`|11.0| | | | | | | |
|`__bfloat162int_rz`|11.0| | | | | | | |
|`__bfloat162ll_rd`|11.0| | | | | | | |
|`__bfloat162ll_rn`|11.0| | | | | | | |
|`__bfloat162ll_ru`|11.0| | | | | | | |
|`__bfloat162ll_rz`|11.0| | | | | | | |
|`__bfloat162short_rd`|11.0| | | | | | | |
|`__bfloat162short_rn`|11.0| | | | | | | |
|`__bfloat162short_ru`|11.0| | | | | | | |
Expand All @@ -24,10 +29,16 @@
|`__bfloat162uint_rn`|11.0| | | | | | | |
|`__bfloat162uint_ru`|11.0| | | | | | | |
|`__bfloat162uint_rz`|11.0| | | | | | | |
|`__bfloat162ull_rd`|11.0| | | | | | | |
|`__bfloat162ull_rn`|11.0| | | | | | | |
|`__bfloat162ull_ru`|11.0| | | | | | | |
|`__bfloat162ull_rz`|11.0| | | | | | | |
|`__bfloat162ushort_rd`|11.0| | | | | | | |
|`__bfloat162ushort_rn`|11.0| | | | | | | |
|`__bfloat162ushort_ru`|11.0| | | | | | | |
|`__bfloat162ushort_rz`|11.0| | | | | | | |
|`__bfloat16_as_short`|11.0| | | | | | | |
|`__bfloat16_as_ushort`|11.0| | | | | | | |
|`__brev`| | | |`__brev`|1.6.0| | | |
|`__brevll`| | | |`__brevll`|1.6.0| | | |
|`__brkpt`| | | | | | | | |
Expand Down Expand Up @@ -200,6 +211,7 @@
|`__half2ushort_rz`| | | |`__half2ushort_rz`|1.6.0| | | |
|`__half_as_short`| | | |`__half_as_short`|1.6.0| | | |
|`__half_as_ushort`| | | |`__half_as_ushort`|1.6.0| | | |
|`__halves2bfloat162`|11.0| | | | | | | |
|`__halves2half2`| | | |`__halves2half2`|1.6.0| | | |
|`__hbeq2`| | | |`__hbeq2`|1.6.0| | | |
|`__hbequ2`| | | |`__hbequ2`|1.9.0| | | |
Expand Down Expand Up @@ -233,9 +245,12 @@
|`__hgt2`| | | |`__hgt2`|1.6.0| | | |
|`__hgtu`| | | |`__hgtu`|1.9.0| | | |
|`__hgtu2`| | | |`__hgtu2`|1.9.0| | | |
|`__high2bfloat16`|11.0| | | | | | | |
|`__high2bfloat162`|11.0| | | | | | | |
|`__high2float`| | | |`__high2float`|1.6.0| | | |
|`__high2half`| | | |`__high2half`|1.6.0| | | |
|`__high2half2`| | | |`__high2half2`|1.6.0| | | |
|`__highs2bfloat162`|11.0| | | | | | | |
|`__highs2half2`| | | |`__highs2half2`|1.6.0| | | |
|`__hiloint2double`| | | |`__hiloint2double`|1.6.0| | | |
|`__hisinf`| | | |`__hisinf`|1.6.0| | | |
Expand Down Expand Up @@ -301,6 +316,10 @@
|`__ldcv`|11.0| | | | | | | |
|`__ldg`| | | |`__ldg`|1.6.0| | | |
|`__ldlu`|11.0| | | | | | | |
|`__ll2bfloat16_rd`|11.0| | | | | | | |
|`__ll2bfloat16_rn`|11.0| | | | | | | |
|`__ll2bfloat16_ru`|11.0| | | | | | | |
|`__ll2bfloat16_rz`|11.0| | | | | | | |
|`__ll2double_rd`| | | |`__ll2double_rd`|1.6.0| | | |
|`__ll2double_rn`| | | |`__ll2double_rn`|1.6.0| | | |
|`__ll2double_ru`| | | |`__ll2double_ru`|1.6.0| | | |
Expand All @@ -317,10 +336,13 @@
|`__log2f`| | | |`__log2f`|1.6.0| | | |
|`__logf`| | | |`__logf`|1.6.0| | | |
|`__longlong_as_double`| | | |`__longlong_as_double`|1.6.0| | | |
|`__low2bfloat16`|11.0| | | | | | | |
|`__low2bfloat162`|11.0| | | | | | | |
|`__low2float`| | | |`__low2float`|1.6.0| | | |
|`__low2half`| | | |`__low2half`|1.6.0| | | |
|`__low2half2`| | | |`__low2half2`|1.6.0| | | |
|`__lowhigh2highlow`| | | |`__lowhigh2highlow`|1.6.0| | | |
|`__lows2bfloat162`|11.0| | | | | | | |
|`__lows2half2`| | | |`__lows2half2`|1.6.0| | | |
|`__mul24`| | | |`__mul24`|1.6.0| | | |
|`__mul64hi`| | | |`__mul64hi`|1.6.0| | | |
Expand Down Expand Up @@ -362,6 +384,7 @@
|`__short2half_rn`| | | |`__short2half_rn`|1.6.0| | | |
|`__short2half_ru`| | | |`__short2half_ru`|1.6.0| | | |
|`__short2half_rz`| | | |`__short2half_rz`|1.6.0| | | |
|`__short_as_bfloat16`|11.0| | | | | | | |
|`__short_as_half`| | | |`__short_as_half`|1.9.0| | | |
|`__signbit`| | | | | | | | |
|`__signbitf`| | | | | | | | |
Expand Down Expand Up @@ -396,6 +419,10 @@
|`__uint2half_ru`| | | |`__uint2half_ru`|1.6.0| | | |
|`__uint2half_rz`| | | |`__uint2half_rz`|1.6.0| | | |
|`__uint_as_float`| | | |`__uint_as_float`|1.6.0| | | |
|`__ull2bfloat16_rd`|11.0| | | | | | | |
|`__ull2bfloat16_rn`|11.0| | | | | | | |
|`__ull2bfloat16_ru`|11.0| | | | | | | |
|`__ull2bfloat16_rz`|11.0| | | | | | | |
|`__ull2double_rd`| | | |`__ull2double_rd`|1.6.0| | | |
|`__ull2double_rn`| | | |`__ull2double_rn`|1.6.0| | | |
|`__ull2double_ru`| | | |`__ull2double_ru`|1.6.0| | | |
Expand All @@ -421,6 +448,7 @@
|`__ushort2half_rn`| | | |`__ushort2half_rn`|1.6.0| | | |
|`__ushort2half_ru`| | | |`__ushort2half_ru`|1.6.0| | | |
|`__ushort2half_rz`| | | |`__ushort2half_rz`|1.6.0| | | |
|`__ushort_as_bfloat16`|11.0| | | | | | | |
|`__ushort_as_half`| | | |`__ushort_as_half`|1.6.0| | | |
|`__vabs2`| | | | | | | | |
|`__vabs4`| | | | | | | | |
Expand Down
56 changes: 56 additions & 0 deletions src/CUDA2HIP_Device_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,34 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DEVICE_FUNCTION_MAP {
{"__ushort2bfloat16_rz", {"__ushort2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__ushort2bfloat16_rd", {"__ushort2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__ushort2bfloat16_ru", {"__ushort2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat162ull_rn", {"__bfloat162ull_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat162ull_rz", {"__bfloat162ull_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat162ull_rd", {"__bfloat162ull_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat162ull_ru", {"__bfloat162ull_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__ull2bfloat16_rn", {"__ull2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__ull2bfloat16_rz", {"__ull2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__ull2bfloat16_rd", {"__ull2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__ull2bfloat16_ru", {"__ull2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat162ll_rn", {"__bfloat162ll_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat162ll_rz", {"__bfloat162ll_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat162ll_rd", {"__bfloat162ll_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat162ll_ru", {"__bfloat162ll_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__ll2bfloat16_rn", {"__ll2bfloat16_rn", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__ll2bfloat16_rz", {"__ll2bfloat16_rz", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__ll2bfloat16_rd", {"__ll2bfloat16_rd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__ll2bfloat16_ru", {"__ll2bfloat16_ru", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat162bfloat162", {"__bfloat162bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__lows2bfloat162", {"__lows2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__highs2bfloat162", {"__highs2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__high2bfloat16", {"__high2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__low2bfloat16", {"__low2bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__halves2bfloat162", {"__halves2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__low2bfloat162", {"__halves2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__high2bfloat162", {"__high2bfloat162", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat16_as_short", {"__bfloat16_as_short", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__bfloat16_as_ushort", {"__bfloat16_as_ushort", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__short_as_bfloat16", {"__short_as_bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
{"__ushort_as_bfloat16", {"__ushort_as_bfloat16", "", CONV_DEVICE_FUNC, API_RUNTIME, 1, UNSUPPORTED}},
// atomic functions
{"atomicAdd", {"atomicAdd", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
{"atomicAdd_system", {"atomicAdd_system", "", CONV_DEVICE_FUNC, API_RUNTIME, 1}},
Expand Down Expand Up @@ -848,6 +876,34 @@ const std::map<llvm::StringRef, cudaAPIversions> CUDA_DEVICE_FUNCTION_VER_MAP {
{"__ushort2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }},
{"__ushort2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }},
{"__ushort2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }},
{"__bfloat162ull_rn", {CUDA_110, CUDA_0, CUDA_0 }},
{"__bfloat162ull_rz", {CUDA_110, CUDA_0, CUDA_0 }},
{"__bfloat162ull_rd", {CUDA_110, CUDA_0, CUDA_0 }},
{"__bfloat162ull_ru", {CUDA_110, CUDA_0, CUDA_0 }},
{"__ull2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }},
{"__ull2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }},
{"__ull2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }},
{"__ull2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }},
{"__bfloat162ll_rn", {CUDA_110, CUDA_0, CUDA_0 }},
{"__bfloat162ll_rz", {CUDA_110, CUDA_0, CUDA_0 }},
{"__bfloat162ll_rd", {CUDA_110, CUDA_0, CUDA_0 }},
{"__bfloat162ll_ru", {CUDA_110, CUDA_0, CUDA_0 }},
{"__ll2bfloat16_rn", {CUDA_110, CUDA_0, CUDA_0 }},
{"__ll2bfloat16_rz", {CUDA_110, CUDA_0, CUDA_0 }},
{"__ll2bfloat16_rd", {CUDA_110, CUDA_0, CUDA_0 }},
{"__ll2bfloat16_ru", {CUDA_110, CUDA_0, CUDA_0 }},
{"__bfloat162bfloat162", {CUDA_110, CUDA_0, CUDA_0 }},
{"__lows2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }},
{"__highs2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }},
{"__high2bfloat16", {CUDA_110, CUDA_0, CUDA_0 }},
{"__low2bfloat16", {CUDA_110, CUDA_0, CUDA_0 }},
{"__halves2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }},
{"__low2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }},
{"__high2bfloat162", {CUDA_110, CUDA_0, CUDA_0 }},
{"__bfloat16_as_short", {CUDA_110, CUDA_0, CUDA_0 }},
{"__bfloat16_as_ushort", {CUDA_110, CUDA_0, CUDA_0 }},
{"__short_as_bfloat16", {CUDA_110, CUDA_0, CUDA_0 }},
{"__ushort_as_bfloat16", {CUDA_110, CUDA_0, CUDA_0 }},
{"__hcmadd", {CUDA_111, CUDA_0, CUDA_0 }},
{"__hadd2_rn", {CUDA_116, CUDA_0, CUDA_0 }},
{"__hsub2_rn", {CUDA_116, CUDA_0, CUDA_0 }},
Expand Down

0 comments on commit 167e52e

Please sign in to comment.