Skip to content

Commit

Permalink
Implemented NEON UDOT (by vector) instruction with tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
FinnWilkinson committed Nov 6, 2024
1 parent 9b13a5c commit f9a759f
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 0 deletions.
27 changes: 27 additions & 0 deletions src/include/simeng/arch/aarch64/helpers/neon.hh
Original file line number Diff line number Diff line change
Expand Up @@ -951,6 +951,33 @@ RegisterValue vecUzp(srcValContainer& sourceValues, bool isUzp1) {
return {out, 256};
}

/** Helper function for NEON instructions with the format `udot vd.s, vn.b,
* vm.b`. D represents the number of elements in the output vector to be updated
* (i.e. for vd.2s D = 2). Only 2 or 4 are valid. Returns correctly formatted
* RegisterValue. */
template <int D>
RegisterValue vecUdot(
srcValContainer& sourceValues,
const simeng::arch::aarch64::InstructionMetadata& metadata) {
// Check D and N are valid values
static_assert((D == 2 || D == 4) &&
"D must be either 2 or 4 to align with vd.2s or vd.4s.");

const uint32_t* vd = sourceValues[0].getAsVector<uint32_t>();
const uint8_t* vn = sourceValues[1].getAsVector<uint8_t>();
const uint8_t* vm = sourceValues[2].getAsVector<uint8_t>();

uint32_t out[D] = {0};
for (int i = 0; i < D; i++) {
out[i] = vd[i];
for (int j = 0; j < 4; j++) {
out[i] += (static_cast<uint32_t>(vn[(4 * i) + j]) *
static_cast<uint32_t>(vm[(4 * i) + j]));
}
}
return {out, 256};
}

/** Helper function for NEON instructions with the format `udot vd.s, vn.b,
* vm.4b[index]`.
* D represents the number of elements in the output vector to be updated (i.e.
Expand Down
4 changes: 4 additions & 0 deletions src/lib/arch/aarch64/Instruction_execute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6550,6 +6550,10 @@ void Instruction::execute() {
metadata_, VL_bits);
break;
}
case Opcode::AArch64_UDOTv16i8: { // udot vd.4s, vn.16b, vm.16b
results_[0] = vecUdot<4>(sourceValues_, metadata_);
break;
}
case Opcode::AArch64_UDOTlanev16i8: { // udot vd.4s, vn.16b, vm.4b[index]
results_[0] = vecUdot_byElement<4>(sourceValues_, metadata_);
break;
Expand Down
24 changes: 24 additions & 0 deletions test/regression/aarch64/instructions/neon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3681,6 +3681,30 @@ TEST_P(InstNeon, udot) {
CHECK_NEON(3, uint32_t, {0xd328, 0x288e8, 0x27e25, 0x2b87f});
CHECK_NEON(4, uint32_t, {0xc333, 0x2731b, 0x0, 0x0});
CHECK_NEON(5, uint32_t, {0x1fe2, 0x8e62, 0xad7e, 0xb52f});

// udot by vector
initialHeapData_.resize(128);
heap64 = reinterpret_cast<uint64_t*>(initialHeapData_.data());
heap64[0] = 0xDEADBEEFFFFFFFFF;
heap64[1] = 0x01234567ABBACAFE;
heap64[2] = 0xFEDCBA98FFFFFFFF;
heap64[3] = 0xDEADCAFEABBABEEF;
RUN_AARCH64(R"(
# Get heap address
mov x0, #0
mov x8, #214
svc #0
ldr q0, [x0]
ldr q1, [x0, #16]
movi v2.4s, #3
udot v2.4s, v1.16b, v0.16b
)");
CHECK_NEON(0, uint64_t, {0xDEADBEEFFFFFFFFF, 0x01234567ABBACAFE});
CHECK_NEON(1, uint64_t, {0xFEDCBA98FFFFFFFF, 0xDEADCAFEABBABEEF});
CHECK_NEON(2, uint32_t, {0x3F807, 0x288E7, 0x27C6E, 0xB52C});
}

TEST_P(InstNeon, uzp) {
Expand Down

0 comments on commit f9a759f

Please sign in to comment.