Merge from 'xmain' to 'xmain-web' (KhronosGroup#3)

vmaksimo · Oct 27, 2020 · 135755e · 135755e
2 parents 1b49094 + 75c3720
commit 135755e
Show file tree

Hide file tree

Showing 2 changed files with 195 additions and 6 deletions.
diff --git a/lib/SPIRV/SPIRVRegularizeLLVM.cpp b/lib/SPIRV/SPIRVRegularizeLLVM.cpp
@@ -85,6 +85,24 @@ class SPIRVRegularizeLLVM : public ModulePass {
   /// @spirv.llvm_memset_* and replace it with @llvm.memset.
   void lowerMemset(MemSetInst *MSI);
 
+  /// No SPIR-V counterpart for @llvm.fshl.* intrinsic. It will be lowered
+  /// to a newly generated @spirv.llvm_fshl_* function.
+  ///
+  /// Conceptually, FSHL:
+  /// 1. concatenates the ints, the first one being the more significant;
+  /// 2. performs a left shift-rotate on the resulting doubled-sized int;
+  /// 3. returns the most significant bits of the shift-rotate result,
+  ///    the number of bits being equal to the size of the original integers.
+  /// If FSHL operates on a vector type instead, the same operations are
+  /// performed for each set of corresponding vector elements.
+  ///
+  /// The actual implementation algorithm will be slightly different for
+  /// simplification purposes.
+  void lowerFunnelShiftLeft(IntrinsicInst *FSHLIntrinsic);
+  void buildFunnelShiftLeftFunc(Function *FSHLFunc);
+
+  static std::string lowerLLVMIntrinsicName(IntrinsicInst *II);
+
   static char ID;
 
 private:
@@ -94,17 +112,22 @@ class SPIRVRegularizeLLVM : public ModulePass {
 
 char SPIRVRegularizeLLVM::ID = 0;
 
-void SPIRVRegularizeLLVM::lowerMemset(MemSetInst *MSI) {
-  if (isa<Constant>(MSI->getValue()) && isa<ConstantInt>(MSI->getLength()))
-    return; // To be handled in LLVMToSPIRV::transIntrinsicInst
-  Function *IntrinsicFunc = MSI->getCalledFunction();
+std::string SPIRVRegularizeLLVM::lowerLLVMIntrinsicName(IntrinsicInst *II) {
+  Function *IntrinsicFunc = II->getCalledFunction();
   assert(IntrinsicFunc && "Missing function");
   std::string FuncName = IntrinsicFunc->getName().str();
   std::replace(FuncName.begin(), FuncName.end(), '.', '_');
   FuncName = "spirv." + FuncName;
+  return FuncName;
+}
+
+void SPIRVRegularizeLLVM::lowerMemset(MemSetInst *MSI) {
+  if (isa<Constant>(MSI->getValue()) && isa<ConstantInt>(MSI->getLength()))
+    return; // To be handled in LLVMToSPIRV::transIntrinsicInst
+
+  std::string FuncName = lowerLLVMIntrinsicName(MSI);
   if (MSI->isVolatile())
     FuncName += ".volatile";
-
   // Redirect @llvm.memset.* call to @spirv.llvm_memset_*
   Function *F = M->getFunction(FuncName);
   if (F) {
@@ -137,6 +160,53 @@ void SPIRVRegularizeLLVM::lowerMemset(MemSetInst *MSI) {
   return;
 }
 
+void SPIRVRegularizeLLVM::buildFunnelShiftLeftFunc(Function *FSHLFunc) {
+  if (!FSHLFunc->empty())
+    return;
+
+  auto *RotateBB = BasicBlock::Create(M->getContext(), "rotate", FSHLFunc);
+  IRBuilder<> Builder(RotateBB);
+  Type *Ty = FSHLFunc->getReturnType();
+  // Build the actual funnel shift rotate logic.
+  // In the comments, "int" is used interchangeably with "vector of int
+  // elements".
+  FixedVectorType *VectorTy = dyn_cast<FixedVectorType>(Ty);
+  Type *IntTy = VectorTy ? VectorTy->getElementType() : Ty;
+  unsigned BitWidth = IntTy->getIntegerBitWidth();
+  ConstantInt *BitWidthConstant = Builder.getInt({BitWidth, BitWidth});
+  Value *BitWidthForInsts =
+      VectorTy ? Builder.CreateVectorSplat(VectorTy->getNumElements(),
+                                           BitWidthConstant)
+               : BitWidthConstant;
+  auto *RotateModVal =
+      Builder.CreateURem(/*Rotate*/ FSHLFunc->getArg(2), BitWidthForInsts);
+  // Shift the more significant number left, the "rotate" number of bits
+  // will be 0-filled on the right as a result of this regular shift.
+  auto *ShiftLeft = Builder.CreateShl(FSHLFunc->getArg(0), RotateModVal);
+  // We want the "rotate" number of the second int's MSBs to occupy the
+  // rightmost "0 space" left by the previous operation. Therefore,
+  // subtract the "rotate" number from the integer bitsize...
+  auto *SubRotateVal = Builder.CreateSub(BitWidthForInsts, RotateModVal);
+  // ...and right-shift the second int by this number, zero-filling the MSBs.
+  auto *ShiftRight = Builder.CreateLShr(FSHLFunc->getArg(1), SubRotateVal);
+  // A simple binary addition of the shifted ints yields the final result.
+  auto *FunnelShiftRes = Builder.CreateOr(ShiftLeft, ShiftRight);
+  Builder.CreateRet(FunnelShiftRes);
+}
+
+void SPIRVRegularizeLLVM::lowerFunnelShiftLeft(IntrinsicInst *FSHLIntrinsic) {
+  // Get a separate function - otherwise, we'd have to rework the CFG of the
+  // current one. Then simply replace the intrinsic uses with a call to the new
+  // function.
+  FunctionType *FSHLFuncTy = FSHLIntrinsic->getFunctionType();
+  Type *FSHLRetTy = FSHLFuncTy->getReturnType();
+  const std::string FuncName = lowerLLVMIntrinsicName(FSHLIntrinsic);
+  Function *FSHLFunc =
+      getOrCreateFunction(M, FSHLRetTy, FSHLFuncTy->params(), FuncName);
+  buildFunnelShiftLeftFunc(FSHLFunc);
+  FSHLIntrinsic->setCalledFunction(FSHLFunc);
+}
+
 bool SPIRVRegularizeLLVM::runOnModule(Module &Module) {
   M = &Module;
   Ctx = &M->getContext();
@@ -170,8 +240,11 @@ bool SPIRVRegularizeLLVM::regularize() {
           Function *CF = Call->getCalledFunction();
           if (CF && CF->isIntrinsic()) {
             removeFnAttr(Call, Attribute::NoUnwind);
-            if (auto *MSI = dyn_cast<MemSetInst>(Call))
+            auto *II = cast<IntrinsicInst>(Call);
+            if (auto *MSI = dyn_cast<MemSetInst>(II))
               lowerMemset(MSI);
+            else if (II->getIntrinsicID() == Intrinsic::fshl)
+              lowerFunnelShiftLeft(II);
           }
         }
 

diff --git a/test/llvm-intrinsics/fshl.ll b/test/llvm-intrinsics/fshl.ll
@@ -0,0 +1,116 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+; RUN: llvm-spirv %t.bc -o %t.spv
+; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir64-unknown-unknown"
+
+; CHECK-SPIRV: Name [[NAME_FSHL_FUNC_32:[0-9]+]] "spirv.llvm_fshl_i32"
+; CHECK-SPIRV: Name [[NAME_FSHL_FUNC_16:[0-9]+]] "spirv.llvm_fshl_i16"
+; CHECK-SPIRV: Name [[NAME_FSHL_FUNC_VEC_INT_16:[0-9]+]] "spirv.llvm_fshl_v2i16"
+; CHECK-SPIRV: TypeInt [[TYPE_INT_32:[0-9]+]] 32 0
+; CHECK-SPIRV: TypeInt [[TYPE_INT_16:[0-9]+]] 16 0
+; CHECK-SPIRV-DAG: Constant [[TYPE_INT_32]] [[CONST_ROTATE_32:[0-9]+]] 8
+; CHECK-SPIRV-DAG: Constant [[TYPE_INT_16]] [[CONST_ROTATE_16:[0-9]+]] 8
+; CHECK-SPIRV-DAG: Constant [[TYPE_INT_32]] [[CONST_TYPE_SIZE_32:[0-9]+]] 32
+; CHECK-SPIRV: TypeFunction [[TYPE_ORIG_FUNC_32:[0-9]+]] [[TYPE_INT_32]] [[TYPE_INT_32]] [[TYPE_INT_32]]
+; CHECK-SPIRV: TypeFunction [[TYPE_FSHL_FUNC_32:[0-9]+]] [[TYPE_INT_32]] [[TYPE_INT_32]] [[TYPE_INT_32]] [[TYPE_INT_32]]
+; CHECK-SPIRV: TypeFunction [[TYPE_ORIG_FUNC_16:[0-9]+]] [[TYPE_INT_16]] [[TYPE_INT_16]] [[TYPE_INT_16]]
+; CHECK-SPIRV: TypeFunction [[TYPE_FSHL_FUNC_16:[0-9]+]] [[TYPE_INT_16]] [[TYPE_INT_16]] [[TYPE_INT_16]] [[TYPE_INT_16]]
+; CHECK-SPIRV: TypeVector [[TYPE_VEC_INT_16:[0-9]+]] [[TYPE_INT_16]] 2
+; CHECK-SPIRV: TypeFunction [[TYPE_ORIG_FUNC_VEC_INT_16:[0-9]+]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]]
+; CHECK-SPIRV: TypeFunction [[TYPE_FSHL_FUNC_VEC_INT_16:[0-9]+]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]] [[TYPE_VEC_INT_16]]
+; CHECK-SPIRV: ConstantComposite [[TYPE_VEC_INT_16]] [[CONST_ROTATE_VEC_INT_16:[0-9]+]] [[CONST_ROTATE_16]] [[CONST_ROTATE_16]]
+
+; On LLVM level, we'll check that the intrinsics were generated again in reverse translation,
+; replacing the SPIR-V level implementations.
+; CHECK-LLVM-NOT: declare {{.*}} @spirv.llvm_fshl_{{.*}}
+
+; Function Attrs: nounwind readnone
+; CHECK-SPIRV: Function [[TYPE_INT_32]] {{[0-9]+}} {{[0-9]+}} [[TYPE_ORIG_FUNC_32]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[X:[0-9]+]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[Y:[0-9]+]]
+define spir_func i32 @Test_i32(i32 %x, i32 %y) local_unnamed_addr #0 {
+entry:
+  ; CHECK-SPIRV: FunctionCall [[TYPE_INT_32]] [[CALL_32_X_Y:[0-9]+]] [[NAME_FSHL_FUNC_32]] [[X]] [[Y]] [[CONST_ROTATE_32]]
+  ; CHECK-LLVM: call i32 @llvm.fshl.i32
+  %0 = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 8)
+  ; CHECK-SPIRV: FunctionCall [[TYPE_INT_32]] [[CALL_32_Y_X:[0-9]+]] [[NAME_FSHL_FUNC_32]] [[Y]] [[X]] [[CONST_ROTATE_32]]
+  ; CHECK-LLVM: call i32 @llvm.fshl.i32
+  %1 = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 8)
+  ; CHECK-SPIRV: IAdd [[TYPE_INT_32]] [[ADD_32:[0-9]+]] [[CALL_32_X_Y]] [[CALL_32_Y_X]]
+  %sum = add i32 %0, %1
+  ; CHECK-SPIRV: ReturnValue [[ADD_32]]
+  ret i32 %sum
+}
+
+; CHECK-SPIRV: Function [[TYPE_INT_32]] [[NAME_FSHL_FUNC_32]] {{[0-9]+}} [[TYPE_FSHL_FUNC_32]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[X_FSHL:[0-9]+]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[Y_FSHL:[0-9]+]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_32]] [[ROT:[0-9]+]]
+
+; CHECK-SPIRV: UMod [[TYPE_INT_32]] [[ROTATE_MOD_SIZE:[0-9]+]] [[ROT]] [[CONST_TYPE_SIZE_32]]
+; CHECK-SPIRV: ShiftLeftLogical [[TYPE_INT_32]] [[X_SHIFT_LEFT:[0-9]+]] [[X_FSHL]] [[ROTATE_MOD_SIZE]]
+; CHECK-SPIRV: ISub [[TYPE_INT_32]] [[NEG_ROTATE:[0-9]+]] [[CONST_TYPE_SIZE_32]] [[ROTATE_MOD_SIZE]]
+; CHECK-SPIRV: ShiftRightLogical [[TYPE_INT_32]] [[Y_SHIFT_RIGHT:[0-9]+]] [[Y_FSHL]] [[NEG_ROTATE]]
+; CHECK-SPIRV: BitwiseOr [[TYPE_INT_32]] [[FSHL_RESULT:[0-9]+]] [[X_SHIFT_LEFT]] [[Y_SHIFT_RIGHT]]
+; CHECK-SPIRV: ReturnValue [[FSHL_RESULT]]
+
+; Function Attrs: nounwind readnone
+; CHECK-SPIRV: Function [[TYPE_INT_16]] {{[0-9]+}} {{[0-9]+}} [[TYPE_ORIG_FUNC_16]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[X:[0-9]+]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[Y:[0-9]+]]
+define spir_func i16 @Test_i16(i16 %x, i16 %y) local_unnamed_addr #0 {
+entry:
+  ; CHECK-SPIRV: FunctionCall [[TYPE_INT_16]] [[CALL_16:[0-9]+]] [[NAME_FSHL_FUNC_16]] [[X]] [[Y]] [[CONST_ROTATE_16]]
+  ; CHECK-LLVM: call i16 @llvm.fshl.i16
+  %0 = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 8)
+  ; CHECK-SPIRV: ReturnValue [[CALL_16]]
+  ret i16 %0
+}
+
+; Just check that the function for i16 was generated as such - we've checked the logic for another type.
+; CHECK-SPIRV: Function [[TYPE_INT_16]] [[NAME_FSHL_FUNC_16]] {{[0-9]+}} [[TYPE_FSHL_FUNC_16]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[X_FSHL:[0-9]+]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[Y_FSHL:[0-9]+]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_INT_16]] [[ROT:[0-9]+]]
+
+; CHECK-SPIRV: Function [[TYPE_VEC_INT_16]] {{[0-9]+}} {{[0-9]+}} [[TYPE_ORIG_FUNC_VEC_INT_16]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[X:[0-9]+]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[Y:[0-9]+]]
+define spir_func <2 x i16> @Test_v2i16(<2 x i16> %x, <2 x i16> %y) local_unnamed_addr #0 {
+entry:
+  ; CHECK-SPIRV: FunctionCall [[TYPE_VEC_INT_16]] [[CALL_VEC_INT_16:[0-9]+]] [[NAME_FSHL_FUNC_VEC_INT_16]] [[X]] [[Y]] [[CONST_ROTATE_VEC_INT_16]]
+  ; CHECK-LLVM: call <2 x i16> @llvm.fshl.v2i16
+  %0 = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> <i16 8, i16 8>)
+  ; CHECK-SPIRV: ReturnValue [[CALL_VEC_INT_16]]
+  ret <2 x i16> %0
+}
+
+; Just check that the function for v2i16 was generated as such - we've checked the logic for another type.
+; CHECK-SPIRV: Function [[TYPE_VEC_INT_16]] [[NAME_FSHL_FUNC_VEC_INT_16]] {{[0-9]+}} [[TYPE_FSHL_FUNC_VEC_INT_16]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[X_FSHL:[0-9]+]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[Y_FSHL:[0-9]+]]
+; CHECK-SPIRV: FunctionParameter [[TYPE_VEC_INT_16]] [[ROT:[0-9]+]]
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare i32 @llvm.fshl.i32(i32, i32, i32) #1
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare i16 @llvm.fshl.i16(i16, i16, i16) #1
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare <2 x i16> @llvm.fshl.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #1
+
+attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone speculatable willreturn }
+
+!llvm.module.flags = !{!0}
+!opencl.ocl.version = !{!1}
+!opencl.spir.version = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, i32 0}
+!2 = !{i32 1, i32 2}