From 371b6af943dd11e4501e099d2e06f414d7e606e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=CE=B6eh=20Matt?= <5415177+ZehMatt@users.noreply.github.com> Date: Tue, 18 Jun 2024 15:34:15 +0300 Subject: [PATCH 1/4] Refactor the encoder to use the table for encode info --- zasm/src/zasm/src/encoder/encoder.cpp | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/zasm/src/zasm/src/encoder/encoder.cpp b/zasm/src/zasm/src/encoder/encoder.cpp index db6e909..579dfd9 100644 --- a/zasm/src/zasm/src/encoder/encoder.cpp +++ b/zasm/src/zasm/src/encoder/encoder.cpp @@ -60,8 +60,8 @@ namespace zasm data[ZYDIS_MNEMONIC_JBE] = EncodeVariantsInfo{ true, 2, 6 }; data[ZYDIS_MNEMONIC_JCXZ] = EncodeVariantsInfo{ true, 2, -1 }; data[ZYDIS_MNEMONIC_JECXZ] = EncodeVariantsInfo{ true, 2, -1 }; - data[ZYDIS_MNEMONIC_JKNZD] = EncodeVariantsInfo{ true, 2, -1 }; - data[ZYDIS_MNEMONIC_JKZD] = EncodeVariantsInfo{ true, 2, -1 }; + data[ZYDIS_MNEMONIC_JKNZD] = EncodeVariantsInfo{ true, 2, 5 }; + data[ZYDIS_MNEMONIC_JKZD] = EncodeVariantsInfo{ true, 2, 5 }; data[ZYDIS_MNEMONIC_JRCXZ] = EncodeVariantsInfo{ true, 2, -1 }; data[ZYDIS_MNEMONIC_JL] = EncodeVariantsInfo{ true, 2, 6 }; data[ZYDIS_MNEMONIC_JLE] = EncodeVariantsInfo{ true, 2, 6 }; @@ -194,16 +194,17 @@ namespace zasm return ErrorCode::None; } - static int64_t getTemporaryRel(EncoderState& state) noexcept + static int64_t getTemporaryRel(EncoderState& state, const EncodeVariantsInfo& encodeInfo) noexcept { auto* ctx = state.ctx; - std::int64_t kTempRel = kTemporaryRel32Value; + std::int64_t kTempRel = 0; - // NOTE: Workaround for some instructions that only accept rel8 - if (state.req.mnemonic == ZYDIS_MNEMONIC_JCXZ || state.req.mnemonic == ZYDIS_MNEMONIC_JECXZ - || state.req.mnemonic == ZYDIS_MNEMONIC_JKNZD || state.req.mnemonic == ZYDIS_MNEMONIC_LOOP - || state.req.mnemonic == ZYDIS_MNEMONIC_LOOPE || state.req.mnemonic == ZYDIS_MNEMONIC_LOOPNE) + if (encodeInfo.canEncodeRel32()) + { + kTempRel = kTemporaryRel32Value; + } + else if (encodeInfo.canEncodeRel8()) { kTempRel = kTemporaryRel8Value; } @@ -217,9 +218,10 @@ namespace zasm auto* ctx = state.ctx; auto desiredBranchType = ZydisBranchType::ZYDIS_BRANCH_TYPE_NONE; - // Initially a temporary placeholder. Make sure this is within rel32 if a - // context is provided. - std::int64_t immValue = getTemporaryRel(state); + const auto& encodeInfo = getEncodeVariantInfo(state.req.mnemonic); + + // Initially a temporary placeholder. + std::int64_t immValue = getTemporaryRel(state, encodeInfo); std::optional labelVA; if (ctx != nullptr && !isLabelExternal(ctx->program, src.getId())) @@ -232,7 +234,6 @@ namespace zasm } // Check if this operand is used as the control flow target. - const auto& encodeInfo = getEncodeVariantInfo(state.req.mnemonic); if (state.operandIndex == 0 && encodeInfo.isControlFlow) { const auto targetAddress = labelVA.has_value() ? *labelVA : immValue; From 08924765407ef81ed8de9dc7efe73f9d6052d5c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=CE=B6eh=20Matt?= <5415177+ZehMatt@users.noreply.github.com> Date: Tue, 18 Jun 2024 15:34:36 +0300 Subject: [PATCH 2/4] Update the tests --- tests/src/tests/tests.serialization.cpp | 77 ++++++++++++++++++++++--- 1 file changed, 70 insertions(+), 7 deletions(-) diff --git a/tests/src/tests/tests.serialization.cpp b/tests/src/tests/tests.serialization.cpp index 7c0cb05..788b807 100644 --- a/tests/src/tests/tests.serialization.cpp +++ b/tests/src/tests/tests.serialization.cpp @@ -1229,13 +1229,14 @@ namespace zasm::tests std::string("Error at node \"and rbp, 0x123456789abcdf\" with id 0: Impossible instruction")); } - TEST(SerializationTests, TestSerializationJecxzBad) + template + static void TestSerializationLabelRangeBad(MachineMode mode, TMnemonic mnemonic, const char* mnemonicName) { - Program program(MachineMode::AMD64); + Program program(mode); x86::Assembler a(program); auto labelLoop = a.createLabel(); - ASSERT_EQ(a.jecxz(labelLoop), ErrorCode::None); + ASSERT_EQ(a.emit(mnemonic, labelLoop), ErrorCode::None); ASSERT_EQ(a.dd(0, 256), ErrorCode::None); ASSERT_EQ(a.bind(labelLoop), ErrorCode::None); @@ -1243,16 +1244,18 @@ namespace zasm::tests auto res = serializer.serialize(program, 0x140015000); ASSERT_EQ(res, ErrorCode::AddressOutOfRange); - ASSERT_EQ(res.getErrorMessage(), std::string("Error at node \"jecxz L0\" with id 0: Label out of range for operand 0")); + const auto errMsg = std::string("Error at node \"") + mnemonicName + + " L0\" with id 0: Label out of range for operand 0"; + ASSERT_EQ(res.getErrorMessage(), errMsg); } - TEST(SerializationTests, TestSerializationJecxzGood) + template static void TestSerializationLabelRangeGood(MachineMode mode, TMnemonic mnemonic) { - Program program(MachineMode::AMD64); + Program program(mode); x86::Assembler a(program); auto labelLoop = a.createLabel(); - ASSERT_EQ(a.jecxz(labelLoop), ErrorCode::None); + ASSERT_EQ(a.emit(mnemonic, labelLoop), ErrorCode::None); ASSERT_EQ(a.dd(0), ErrorCode::None); ASSERT_EQ(a.bind(labelLoop), ErrorCode::None); @@ -1261,6 +1264,66 @@ namespace zasm::tests ASSERT_EQ(res, ErrorCode::None); } + TEST(SerializationTests, TestSerializationJcxzBad) + { + TestSerializationLabelRangeBad(MachineMode::I386, x86::Mnemonic::Jcxz, "jcxz"); + } + + TEST(SerializationTests, TestSerializationJcxzGood) + { + TestSerializationLabelRangeGood(MachineMode::I386, x86::Mnemonic::Jcxz); + } + + TEST(SerializationTests, TestSerializationJecxzBad) + { + TestSerializationLabelRangeBad(MachineMode::AMD64, x86::Mnemonic::Jecxz, "jecxz"); + } + + TEST(SerializationTests, TestSerializationJecxzGood) + { + TestSerializationLabelRangeGood(MachineMode::AMD64, x86::Mnemonic::Jecxz); + } + + TEST(SerializationTests, TestSerializationJrcxzBad) + { + TestSerializationLabelRangeBad(MachineMode::AMD64, x86::Mnemonic::Jrcxz, "jrcxz"); + } + + TEST(SerializationTests, TestSerializationJrcxzGood) + { + TestSerializationLabelRangeGood(MachineMode::AMD64, x86::Mnemonic::Jrcxz); + } + + TEST(SerializationTests, TestSerializationLoopBad) + { + TestSerializationLabelRangeBad(MachineMode::AMD64, x86::Mnemonic::Loop, "loop"); + } + + TEST(SerializationTests, TestSerializationLoopGood) + { + TestSerializationLabelRangeGood(MachineMode::AMD64, x86::Mnemonic::Loop); + } + + TEST(SerializationTests, TestSerializationLoopeBad) + { + TestSerializationLabelRangeBad(MachineMode::AMD64, x86::Mnemonic::Loope, "loope"); + } + + TEST(SerializationTests, TestSerializationLoopeGood) + { + TestSerializationLabelRangeGood(MachineMode::AMD64, x86::Mnemonic::Loope); + } + + TEST(SerializationTests, TestSerializationLoopneBad) + { + TestSerializationLabelRangeBad(MachineMode::AMD64, x86::Mnemonic::Loopne, "loopne"); + } + + TEST(SerializationTests, TestSerializationLoopneGood) + { + TestSerializationLabelRangeGood(MachineMode::AMD64, x86::Mnemonic::Loopne); + } + TEST(SerializationTests, TestBadMemoryDisplacement) { Program program(MachineMode::AMD64); From b38d9f27238c5c1e8bd24cef4f2f5ab15114ae2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=CE=B6eh=20Matt?= <5415177+ZehMatt@users.noreply.github.com> Date: Tue, 18 Jun 2024 16:34:14 +0300 Subject: [PATCH 3/4] Give the encode table an explicit operand index for control flow target --- zasm/src/zasm/src/encoder/encoder.cpp | 55 +++++++++++++-------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/zasm/src/zasm/src/encoder/encoder.cpp b/zasm/src/zasm/src/encoder/encoder.cpp index 579dfd9..9e2c0b0 100644 --- a/zasm/src/zasm/src/encoder/encoder.cpp +++ b/zasm/src/zasm/src/encoder/encoder.cpp @@ -38,6 +38,7 @@ namespace zasm bool isControlFlow{}; std::int8_t encodeSizeRel8{ -1 }; std::int8_t encodeSizeRel32{ -1 }; + std::int8_t cfOperandIndex{ -1 }; constexpr bool canEncodeRel8() const noexcept { @@ -55,32 +56,30 @@ namespace zasm std::array data{}; // NOLINTBEGIN(cppcoreguidelines-avoid-magic-numbers, readability-magic-numbers) - data[ZYDIS_MNEMONIC_JMP] = EncodeVariantsInfo{ true, 2, 5 }; - data[ZYDIS_MNEMONIC_JB] = EncodeVariantsInfo{ true, 2, 6 }; - data[ZYDIS_MNEMONIC_JBE] = EncodeVariantsInfo{ true, 2, 6 }; - data[ZYDIS_MNEMONIC_JCXZ] = EncodeVariantsInfo{ true, 2, -1 }; - data[ZYDIS_MNEMONIC_JECXZ] = EncodeVariantsInfo{ true, 2, -1 }; - data[ZYDIS_MNEMONIC_JKNZD] = EncodeVariantsInfo{ true, 2, 5 }; - data[ZYDIS_MNEMONIC_JKZD] = EncodeVariantsInfo{ true, 2, 5 }; - data[ZYDIS_MNEMONIC_JRCXZ] = EncodeVariantsInfo{ true, 2, -1 }; - data[ZYDIS_MNEMONIC_JL] = EncodeVariantsInfo{ true, 2, 6 }; - data[ZYDIS_MNEMONIC_JLE] = EncodeVariantsInfo{ true, 2, 6 }; - data[ZYDIS_MNEMONIC_JNB] = EncodeVariantsInfo{ true, 2, 6 }; - data[ZYDIS_MNEMONIC_JNBE] = EncodeVariantsInfo{ true, 2, 6 }; - data[ZYDIS_MNEMONIC_JNL] = EncodeVariantsInfo{ true, 2, 6 }; - data[ZYDIS_MNEMONIC_JNLE] = EncodeVariantsInfo{ true, 2, 6 }; - data[ZYDIS_MNEMONIC_JNO] = EncodeVariantsInfo{ true, 2, 6 }; - data[ZYDIS_MNEMONIC_JNP] = EncodeVariantsInfo{ true, 2, 6 }; - data[ZYDIS_MNEMONIC_JNS] = EncodeVariantsInfo{ true, 2, 6 }; - data[ZYDIS_MNEMONIC_JNZ] = EncodeVariantsInfo{ true, 2, 6 }; - data[ZYDIS_MNEMONIC_JO] = EncodeVariantsInfo{ true, 2, 6 }; - data[ZYDIS_MNEMONIC_JP] = EncodeVariantsInfo{ true, 2, 6 }; - data[ZYDIS_MNEMONIC_JS] = EncodeVariantsInfo{ true, 2, 6 }; - data[ZYDIS_MNEMONIC_JZ] = EncodeVariantsInfo{ true, 2, 6 }; - data[ZYDIS_MNEMONIC_LOOP] = EncodeVariantsInfo{ true, 2, -1 }; - data[ZYDIS_MNEMONIC_LOOPE] = EncodeVariantsInfo{ true, 2, -1 }; - data[ZYDIS_MNEMONIC_LOOPNE] = EncodeVariantsInfo{ true, 2, -1 }; - data[ZYDIS_MNEMONIC_CALL] = EncodeVariantsInfo{ true, -1, 5 }; + data[ZYDIS_MNEMONIC_JMP] = EncodeVariantsInfo{ true, 2, 5, 0 }; + data[ZYDIS_MNEMONIC_JB] = EncodeVariantsInfo{ true, 2, 6, 0 }; + data[ZYDIS_MNEMONIC_JBE] = EncodeVariantsInfo{ true, 2, 6, 0 }; + data[ZYDIS_MNEMONIC_JCXZ] = EncodeVariantsInfo{ true, 2, -1, 0 }; + data[ZYDIS_MNEMONIC_JECXZ] = EncodeVariantsInfo{ true, 2, -1, 0 }; + data[ZYDIS_MNEMONIC_JRCXZ] = EncodeVariantsInfo{ true, 2, -1, 0 }; + data[ZYDIS_MNEMONIC_JL] = EncodeVariantsInfo{ true, 2, 6, 0 }; + data[ZYDIS_MNEMONIC_JLE] = EncodeVariantsInfo{ true, 2, 6, 0 }; + data[ZYDIS_MNEMONIC_JNB] = EncodeVariantsInfo{ true, 2, 6, 0 }; + data[ZYDIS_MNEMONIC_JNBE] = EncodeVariantsInfo{ true, 2, 6, 0 }; + data[ZYDIS_MNEMONIC_JNL] = EncodeVariantsInfo{ true, 2, 6, 0 }; + data[ZYDIS_MNEMONIC_JNLE] = EncodeVariantsInfo{ true, 2, 6, 0 }; + data[ZYDIS_MNEMONIC_JNO] = EncodeVariantsInfo{ true, 2, 6, 0 }; + data[ZYDIS_MNEMONIC_JNP] = EncodeVariantsInfo{ true, 2, 6, 0 }; + data[ZYDIS_MNEMONIC_JNS] = EncodeVariantsInfo{ true, 2, 6, 0 }; + data[ZYDIS_MNEMONIC_JNZ] = EncodeVariantsInfo{ true, 2, 6, 0 }; + data[ZYDIS_MNEMONIC_JO] = EncodeVariantsInfo{ true, 2, 6, 0 }; + data[ZYDIS_MNEMONIC_JP] = EncodeVariantsInfo{ true, 2, 6, 0 }; + data[ZYDIS_MNEMONIC_JS] = EncodeVariantsInfo{ true, 2, 6, 0 }; + data[ZYDIS_MNEMONIC_JZ] = EncodeVariantsInfo{ true, 2, 6, 0 }; + data[ZYDIS_MNEMONIC_LOOP] = EncodeVariantsInfo{ true, 2, -1, 0 }; + data[ZYDIS_MNEMONIC_LOOPE] = EncodeVariantsInfo{ true, 2, -1, 0 }; + data[ZYDIS_MNEMONIC_LOOPNE] = EncodeVariantsInfo{ true, 2, -1, 0 }; + data[ZYDIS_MNEMONIC_CALL] = EncodeVariantsInfo{ true, -1, 5, 0 }; // NOLINTEND(cppcoreguidelines-avoid-magic-numbers, readability-magic-numbers) return data; @@ -234,7 +233,7 @@ namespace zasm } // Check if this operand is used as the control flow target. - if (state.operandIndex == 0 && encodeInfo.isControlFlow) + if (encodeInfo.isControlFlow && state.operandIndex == encodeInfo.cfOperandIndex) { const auto targetAddress = labelVA.has_value() ? *labelVA : immValue; @@ -289,7 +288,7 @@ namespace zasm // Check if this operand is used as the control flow target. const auto& encodeInfo = getEncodeVariantInfo(state.req.mnemonic); - if (state.operandIndex == 0 && encodeInfo.isControlFlow) + if (encodeInfo.isControlFlow && state.operandIndex == encodeInfo.cfOperandIndex) { const auto targetAddress = immValue; const auto [addrRel, branchType] = processRelAddress(encodeInfo, ctx, targetAddress); From bb4faf9327e24d5bd95aa78ffaeab4160156cc29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=CE=B6eh=20Matt?= <5415177+ZehMatt@users.noreply.github.com> Date: Tue, 18 Jun 2024 16:43:54 +0300 Subject: [PATCH 4/4] Disable mvex instruction encoding, nobody uses this --- zasm/include/zasm/x86/emitter.hpp | 8 -------- zasm/src/zasm/src/encoder/encoder.cpp | 9 +++++++++ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/zasm/include/zasm/x86/emitter.hpp b/zasm/include/zasm/x86/emitter.hpp index 6f1399a..1e12d51 100644 --- a/zasm/include/zasm/x86/emitter.hpp +++ b/zasm/include/zasm/x86/emitter.hpp @@ -2086,14 +2086,6 @@ namespace zasm::x86 { return emit(x86::Mnemonic::Jecxz, a); } - inline Error jknzd(const Mask& a, const Imm& b) - { - return emit(x86::Mnemonic::Jknzd, a, b); - } - inline Error jkzd(const Mask& a, const Imm& b) - { - return emit(x86::Mnemonic::Jkzd, a, b); - } inline Error jl(const Imm& a) { return emit(x86::Mnemonic::Jl, a); diff --git a/zasm/src/zasm/src/encoder/encoder.cpp b/zasm/src/zasm/src/encoder/encoder.cpp index 9e2c0b0..4f123ee 100644 --- a/zasm/src/zasm/src/encoder/encoder.cpp +++ b/zasm/src/zasm/src/encoder/encoder.cpp @@ -33,6 +33,13 @@ namespace zasm static constexpr std::int32_t kHintRequiresSize = -1; + static constexpr auto kAllowedEncodingX86 = static_cast( + ZYDIS_ENCODABLE_ENCODING_LEGACY | ZYDIS_ENCODABLE_ENCODING_3DNOW); + + static constexpr auto kAllowedEncodingX64 = static_cast( + ZYDIS_ENCODABLE_ENCODING_LEGACY | ZYDIS_ENCODABLE_ENCODING_3DNOW | ZYDIS_ENCODABLE_ENCODING_XOP + | ZYDIS_ENCODABLE_ENCODING_VEX | ZYDIS_ENCODABLE_ENCODING_EVEX); + struct EncodeVariantsInfo { bool isControlFlow{}; @@ -532,10 +539,12 @@ namespace zasm if (mode == MachineMode::AMD64) { req.machine_mode = ZYDIS_MACHINE_MODE_LONG_64; + req.allowed_encodings = kAllowedEncodingX64; } else if (mode == MachineMode::I386) { req.machine_mode = ZYDIS_MACHINE_MODE_LONG_COMPAT_32; + req.allowed_encodings = kAllowedEncodingX86; } req.mnemonic = static_cast(mnemonic.value()); req.prefixes = getAttribs(attribs);