From 4965bbb4f9b85af0d847deae044288652ab20bb9 Mon Sep 17 00:00:00 2001 From: Quan Anh Mai Date: Wed, 30 Oct 2024 16:43:55 +0700 Subject: [PATCH] Implement Vector API masked load --- .../compiler/asm/amd64/AMD64Assembler.java | 2 + .../meta/UnimplementedGraalIntrinsics.java | 8 ++-- .../lir/amd64/vector/AMD64VectorMove.java | 47 +++++++++++++++++++ .../lir/gen/ArithmeticLIRGeneratorTool.java | 5 ++ 4 files changed, 59 insertions(+), 3 deletions(-) diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64Assembler.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64Assembler.java index a29c52a72d2b..a4f667890d87 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64Assembler.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64Assembler.java @@ -1616,6 +1616,8 @@ public static final class VexMoveOp extends VexGeneralMoveOp { // EVEX encoded instructions public static final VexMoveOp EVMOVDQA32 = new VexMoveOp("EVMOVDQA32", VMOVDQA32); public static final VexMoveOp EVMOVDQA64 = new VexMoveOp("EVMOVDQA64", VMOVDQA64); + public static final VexMoveOp EVMOVDQU8 = new VexMoveOp("EVMOVDQU8", VEXPrefixConfig.P_F2, VEXPrefixConfig.M_0F, VEXPrefixConfig.W0, 0x6F, 0x7F, VEXOpAssertion.AVX512BW_VL, EVEXTuple.FVM, VEXPrefixConfig.W0, true); + public static final VexMoveOp EVMOVDQU16 = new VexMoveOp("EVMOVDQU16", VEXPrefixConfig.P_F2, VEXPrefixConfig.M_0F, VEXPrefixConfig.W1, 0x6F, 0x7F, VEXOpAssertion.AVX512BW_VL, EVEXTuple.FVM, VEXPrefixConfig.W1, true); public static final VexMoveOp EVMOVDQU32 = new VexMoveOp("EVMOVDQU32", VMOVDQU32); public static final VexMoveOp EVMOVDQU64 = new VexMoveOp("EVMOVDQU64", VMOVDQU64); public static final VexMoveOp EVMOVAPS = new VexMoveOp("EVMOVAPS", VMOVAPS); diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/hotspot/meta/UnimplementedGraalIntrinsics.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/hotspot/meta/UnimplementedGraalIntrinsics.java index 199966c5180d..4f5a92b0b34c 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/hotspot/meta/UnimplementedGraalIntrinsics.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/hotspot/meta/UnimplementedGraalIntrinsics.java @@ -96,8 +96,6 @@ public UnimplementedGraalIntrinsics(Architecture arch) { "jdk/internal/vm/vector/VectorSupport.compressExpandOp(ILjava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;ILjdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$VectorMask;Ljdk/internal/vm/vector/VectorSupport$CompressExpandOperation;)Ljdk/internal/vm/vector/VectorSupport$VectorPayload;", "jdk/internal/vm/vector/VectorSupport.indexPartiallyInUpperRange(Ljava/lang/Class;Ljava/lang/Class;IJJLjdk/internal/vm/vector/VectorSupport$IndexPartiallyInUpperRangeOperation;)Ljdk/internal/vm/vector/VectorSupport$VectorMask;", "jdk/internal/vm/vector/VectorSupport.indexVector(Ljava/lang/Class;Ljava/lang/Class;ILjdk/internal/vm/vector/VectorSupport$Vector;ILjdk/internal/vm/vector/VectorSupport$VectorSpecies;Ljdk/internal/vm/vector/VectorSupport$IndexOperation;)Ljdk/internal/vm/vector/VectorSupport$Vector;", - jdk == 21 ? "jdk/internal/vm/vector/VectorSupport.loadMasked(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$VectorMask;ILjava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$VectorSpecies;Ljdk/internal/vm/vector/VectorSupport$LoadVectorMaskedOperation;)Ljdk/internal/vm/vector/VectorSupport$Vector;": - "jdk/internal/vm/vector/VectorSupport.loadMasked(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JZLjdk/internal/vm/vector/VectorSupport$VectorMask;ILjava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$VectorSpecies;Ljdk/internal/vm/vector/VectorSupport$LoadVectorMaskedOperation;)Ljdk/internal/vm/vector/VectorSupport$Vector;", "jdk/internal/vm/vector/VectorSupport.loadWithMap(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Class;Ljava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$VectorMask;Ljava/lang/Object;I[IILjdk/internal/vm/vector/VectorSupport$VectorSpecies;Ljdk/internal/vm/vector/VectorSupport$LoadVectorOperationWithMap;)Ljdk/internal/vm/vector/VectorSupport$Vector;", "jdk/internal/vm/vector/VectorSupport.maybeRebox(Ljdk/internal/vm/vector/VectorSupport$VectorPayload;)Ljdk/internal/vm/vector/VectorSupport$VectorPayload;", jdk == 21 ? "jdk/internal/vm/vector/VectorSupport.storeMasked(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$VectorMask;Ljava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$StoreVectorMaskedOperation;)V": @@ -154,9 +152,11 @@ public UnimplementedGraalIntrinsics(Architecture arch) { } if (jdk == 21) { + // @formatter:off // JDK-8325169 // handled by an intrinsic for StringUTF16.indexOfCharUnsafe add(ignore, "java/lang/StringUTF16.indexOfChar([BIII)I"); + // @formatter:on } if (jdk >= 24) { // Newly added by JDK-8338694. HotSpot runtime does not implement @@ -197,6 +197,8 @@ public UnimplementedGraalIntrinsics(Architecture arch) { "jdk/internal/vm/vector/VectorSupport.insert(Ljava/lang/Class;Ljava/lang/Class;ILjdk/internal/vm/vector/VectorSupport$Vector;IJLjdk/internal/vm/vector/VectorSupport$VecInsertOp;)Ljdk/internal/vm/vector/VectorSupport$Vector;", jdk == 21 ? "jdk/internal/vm/vector/VectorSupport.load(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JLjava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$VectorSpecies;Ljdk/internal/vm/vector/VectorSupport$LoadOperation;)Ljdk/internal/vm/vector/VectorSupport$VectorPayload;": "jdk/internal/vm/vector/VectorSupport.load(Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JZLjava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$VectorSpecies;Ljdk/internal/vm/vector/VectorSupport$LoadOperation;)Ljdk/internal/vm/vector/VectorSupport$VectorPayload;", + jdk == 21 ? "jdk/internal/vm/vector/VectorSupport.loadMasked(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$VectorMask;ILjava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$VectorSpecies;Ljdk/internal/vm/vector/VectorSupport$LoadVectorMaskedOperation;)Ljdk/internal/vm/vector/VectorSupport$Vector;": + "jdk/internal/vm/vector/VectorSupport.loadMasked(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;ILjava/lang/Object;JZLjdk/internal/vm/vector/VectorSupport$VectorMask;ILjava/lang/Object;JLjdk/internal/vm/vector/VectorSupport$VectorSpecies;Ljdk/internal/vm/vector/VectorSupport$LoadVectorMaskedOperation;)Ljdk/internal/vm/vector/VectorSupport$Vector;", "jdk/internal/vm/vector/VectorSupport.maskReductionCoerced(ILjava/lang/Class;Ljava/lang/Class;ILjdk/internal/vm/vector/VectorSupport$VectorMask;Ljdk/internal/vm/vector/VectorSupport$VectorMaskOp;)J", "jdk/internal/vm/vector/VectorSupport.rearrangeOp(Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;ILjdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;Ljdk/internal/vm/vector/VectorSupport$VectorMask;Ljdk/internal/vm/vector/VectorSupport$VectorRearrangeOp;)Ljdk/internal/vm/vector/VectorSupport$Vector;", "jdk/internal/vm/vector/VectorSupport.reductionCoerced(ILjava/lang/Class;Ljava/lang/Class;Ljava/lang/Class;ILjdk/internal/vm/vector/VectorSupport$Vector;Ljdk/internal/vm/vector/VectorSupport$VectorMask;Ljdk/internal/vm/vector/VectorSupport$ReductionOperation;)J", @@ -212,7 +214,7 @@ public UnimplementedGraalIntrinsics(Architecture arch) { if (jdk >= 24) { add(enterprise, // @formatter:off - "jdk/internal/vm/vector/VectorSupport.wrapShuffleIndexes(Ljava/lang/Class;Ljava/lang/Class;Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;ILjdk/internal/vm/vector/VectorSupport$WrapShuffleIndexesOperation;)Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;"); + "jdk/internal/vm/vector/VectorSupport.wrapShuffleIndexes(Ljava/lang/Class;Ljava/lang/Class;Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;ILjdk/internal/vm/vector/VectorSupport$WrapShuffleIndexesOperation;)Ljdk/internal/vm/vector/VectorSupport$VectorShuffle;"); // @formatter:on } } diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/vector/AMD64VectorMove.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/vector/AMD64VectorMove.java index 183d25206de1..8b82dba82c74 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/vector/AMD64VectorMove.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/vector/AMD64VectorMove.java @@ -32,13 +32,17 @@ import static jdk.graal.compiler.asm.amd64.AMD64Assembler.VexMoveOp.VMOVUPD; import static jdk.graal.compiler.asm.amd64.AMD64Assembler.VexMoveOp.VMOVUPS; import static jdk.graal.compiler.asm.amd64.AMD64Assembler.VexRVMOp.VXORPD; +import static jdk.graal.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.B0; +import static jdk.graal.compiler.asm.amd64.AMD64BaseAssembler.EVEXPrefixConfig.Z1; import static jdk.vm.ci.code.ValueUtil.asRegister; import static jdk.vm.ci.code.ValueUtil.isRegister; import static jdk.vm.ci.code.ValueUtil.isStackSlot; import jdk.graal.compiler.asm.amd64.AMD64Address; import jdk.graal.compiler.asm.amd64.AMD64Assembler.AMD64SIMDInstructionEncoding; +import jdk.graal.compiler.asm.amd64.AMD64Assembler.VexMaskedMoveOp; import jdk.graal.compiler.asm.amd64.AMD64Assembler.VexMoveOp; +import jdk.graal.compiler.asm.amd64.AMD64Assembler.VexOp; import jdk.graal.compiler.asm.amd64.AMD64BaseAssembler; import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler; import jdk.graal.compiler.asm.amd64.AVXKind; @@ -298,6 +302,49 @@ public void emitMemAccess(AMD64MacroAssembler masm) { } } + public static final class VectorMaskedLoadOp extends AMD64LIRInstruction { + public static final LIRInstructionClass TYPE = LIRInstructionClass.create(VectorMaskedLoadOp.class); + + protected final AVXSize size; + protected final VexOp op; + + @Def({OperandFlag.REG}) protected AllocatableValue result; + @Use({OperandFlag.COMPOSITE}) protected AMD64AddressValue address; + @Use({OperandFlag.REG}) protected AllocatableValue mask; + @State protected LIRFrameState state; + + public VectorMaskedLoadOp(AVXSize size, VexMaskedMoveOp op, AllocatableValue result, AMD64AddressValue address, AllocatableValue mask, LIRFrameState state) { + super(TYPE); + this.size = size; + this.op = op; + this.result = result; + this.address = address; + this.mask = mask; + this.state = state; + } + + public VectorMaskedLoadOp(AVXSize size, VexMoveOp op, AllocatableValue result, AMD64AddressValue address, AllocatableValue mask, LIRFrameState state) { + super(TYPE); + this.size = size; + this.op = op; + this.result = result; + this.address = address; + this.mask = mask; + this.state = state; + } + + @Override + public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) { + GraalError.guarantee(state == null, "Implicit exception not supported yet"); + if (op instanceof VexMaskedMoveOp o) { + o.emit(masm, size, asRegister(result), asRegister(mask), address.toAddress(masm)); + } else { + VexMoveOp o = (VexMoveOp) op; + o.emit(masm, size, asRegister(result), address.toAddress(masm), asRegister(mask), Z1, B0); + } + } + } + public static class VectorStoreOp extends VectorMemOp { public static final LIRInstructionClass TYPE = LIRInstructionClass.create(VectorStoreOp.class); diff --git a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/gen/ArithmeticLIRGeneratorTool.java b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/gen/ArithmeticLIRGeneratorTool.java index b3270a02e44c..573c9bb34805 100644 --- a/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/gen/ArithmeticLIRGeneratorTool.java +++ b/compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/gen/ArithmeticLIRGeneratorTool.java @@ -127,6 +127,11 @@ default Value emitZeroExtend(Value inputVal, int fromBits, int toBits) { Variable emitLoad(LIRKind kind, Value address, LIRFrameState state, MemoryOrderMode memoryOrder, MemoryExtendKind extendKind); + @SuppressWarnings("unused") + default Variable emitMaskedLoad(LIRKind kind, Value address, Value mask, LIRFrameState state, MemoryOrderMode memoryOrder) { + throw GraalError.unimplemented("No specialized implementation available"); // ExcludeFromJacocoGeneratedReport + } + void emitStore(ValueKind kind, Value address, Value input, LIRFrameState state, MemoryOrderMode memoryOrder); /**