diff --git a/examples/decode_to_assembler/main.cpp b/examples/decode_to_assembler/main.cpp index d731eb0..6358b11 100644 --- a/examples/decode_to_assembler/main.cpp +++ b/examples/decode_to_assembler/main.cpp @@ -7,7 +7,15 @@ int main() using namespace zasm; const uint64_t baseAddr = 0x00007FF6BC738ED4; - const std::array code = { + const std::array code = { + 0x40, 0x53, // push rbx + 0x45, 0x8B, 0x18, // mov r11d, dword ptr ds:[r8] + 0x48, 0x8B, 0xDA, // mov rbx, rdx + 0x41, 0x83, 0xE3, 0xF8, // and r11d, 0xFFFFFFF8 + 0x4C, 0x8B, 0xC9, // mov r9, rcx + 0x41, 0xF6, 0x00, 0x04, // test byte ptr ds:[r8], 0x4 + 0x4C, 0x8B, 0xD1, // mov r10, rcx + 0x74, 0x13, // je 0x00007FF6BC738EFF 0x40, 0x53, // push rbx 0x45, 0x8B, 0x18, // mov r11d, dword ptr ds:[r8] 0x48, 0x8B, 0xDA, // mov rbx, rdx @@ -37,15 +45,19 @@ int main() return EXIT_FAILURE; } - const auto& instrInfo = *decoderRes; - - const auto instr = instrInfo.getInstruction(); + const auto& instr = decoderRes.value(); if (auto res = assembler.emit(instr); res != zasm::ErrorCode::None) { std::cout << "Failed to emit instruction " << std::hex << curAddress << ", " << res.getErrorName() << "\n"; } - bytesDecoded += instrInfo.getLength(); + bytesDecoded += instr.getLength(); + + if (x86::isBranching(instr)) + { + // Decode only a single branch. + break; + } } Serializer serializer; diff --git a/zasm/include/zasm/base/instruction.hpp b/zasm/include/zasm/base/instruction.hpp index c081595..b117242 100644 --- a/zasm/include/zasm/base/instruction.hpp +++ b/zasm/include/zasm/base/instruction.hpp @@ -10,46 +10,60 @@ namespace zasm { - /// - /// Base type for Instruction and InstructionDetail, not to be used directly. - /// - template class InstructionBase + class Instruction; + class InstructionDetail; + + namespace detail + { + template struct IsInstructionType : std::false_type + { + }; + + template struct IsInstructionType> : std::true_type + { + }; + } // namespace detail + + class InstructionBase { public: using Length = std::uint8_t; using Mnemonic = InstrMnemonic; using Attribs = InstrAttribs; using Category = InstrCategory; - using OperandCount = std::uint8_t; - using Operands = std::array; + + enum class Type : uint8_t + { + Signanture, + Detail, + }; protected: Attribs _attribs{}; OperandCount _opCount{}; Mnemonic _mnemonic{}; - Operands _operands{}; + Type _type{}; protected: constexpr InstructionBase() = default; - constexpr InstructionBase(Attribs attribs, Mnemonic mnemonic, OperandCount opCount, const Operands& ops) noexcept - : _attribs{ attribs } + constexpr InstructionBase(Type type, Attribs attribs, Mnemonic mnemonic, OperandCount opCount) noexcept + : _type{ type } + , _attribs{ attribs } , _opCount{ opCount } , _mnemonic{ mnemonic } - , _operands{ ops } { } public: - constexpr bool operator==(const InstructionBase& other) const - { - return _attribs == other._attribs && _mnemonic == other._mnemonic && _opCount == other._opCount - && _operands == other._operands; - } - - constexpr bool operator!=(const InstructionBase& other) const + /// + /// Returns the instruction type of this object. The type can be detail or signature. This is only + /// relevant when casting between types. + /// + /// Instruction type + constexpr Type getType() const noexcept { - return !(*this == other); + return _type; } /// @@ -60,6 +74,80 @@ namespace zasm return _mnemonic; } + /// + /// Sets a new mnemonic for this instructions, this must be one of the architecture defined mnemonic + /// ex.: x86::Mnemonic::Mov + /// + /// New mnemonic + /// Instruction& + template constexpr InstructionBase& setMnemonic(T mnemonic) + { + _mnemonic = static_cast(mnemonic); + return *this; + } + + /// + /// Casts this object to T. T must be a type that inherits InstructionBase and has kInstrType. + /// Casting to the wrong type is UB. + /// + /// New type + /// Reference as T + template T& as() + { + static_assert(detail::IsInstructionType::value, "T is not a supported instruction type."); + + assert(T::kInstrType == _type); + + return static_cast(*this); + } + + /// + /// Casts this object to T. T must be a type that inherits InstructionBase and has kInstrType. + /// Casting to the wrong type is UB. + /// + /// New type + /// Reference as T + template const T& as() const + { + static_assert(detail::IsInstructionType::value, "T is not a supported instruction type."); + + assert(T::kInstrType == _type); + + return static_cast(*this); + } + }; + + /// + /// Base type for Instruction and InstructionDetail, not to be used directly. + /// + template class TInstructionBase : public InstructionBase + { + public: + using Operands = std::array; + + protected: + Operands _operands{}; + + protected: + constexpr TInstructionBase() = default; + constexpr TInstructionBase(Attribs attribs, Mnemonic mnemonic, OperandCount opCount, const Operands& ops) noexcept + : InstructionBase(TBase::kInstrType, attribs, mnemonic, opCount) + , _operands{ ops } + { + } + + public: + constexpr bool operator==(const TInstructionBase& other) const + { + return _attribs == other._attribs && _mnemonic == other._mnemonic && _opCount == other._opCount + && _operands == other._operands; + } + + constexpr bool operator!=(const TInstructionBase& other) const + { + return !(*this == other); + } + /// /// Sets a new mnemonic for this instructions, this must be one of the architecture defined mnemonic /// ex.: x86::Mnemonic::Mov diff --git a/zasm/include/zasm/core/packed.hpp b/zasm/include/zasm/core/packed.hpp index d5ea0fb..44f1d45 100644 --- a/zasm/include/zasm/core/packed.hpp +++ b/zasm/include/zasm/core/packed.hpp @@ -11,7 +11,7 @@ namespace zasm /// /// Container to tightly pack multiple elements with a given bit size to store /// in the provided underlying type. - /// Example: Specifying the underlying type as uint32_t and element bit size of 10 allows you to store 3 elements. + /// Example: Specifying the underlying type as uint32_t and element bit size of 10 allows to store 3 elements. /// /// The underlying storage type /// Value type of each element packed diff --git a/zasm/include/zasm/program/instruction.hpp b/zasm/include/zasm/program/instruction.hpp index 854edcb..ed861c2 100644 --- a/zasm/include/zasm/program/instruction.hpp +++ b/zasm/include/zasm/program/instruction.hpp @@ -17,31 +17,34 @@ namespace zasm class InstructionDetail; /// - /// Lightweight instruction object that represents the instruction signature rather than the full instruction. + /// Lightweight instruction object that represents the instruction signature rather than the full instruction, + /// this is the type that is stored as a node in the Program to keep the memory footprint low. /// - class Instruction final : public InstructionBase + class Instruction final : public TInstructionBase { public: + static constexpr auto kInstrType = InstructionBase::Type::Signanture; + constexpr Instruction() noexcept = default; constexpr Instruction(Mnemonic mnemonic) noexcept - : InstructionBase({}, mnemonic, {}, {}) + : TInstructionBase({}, mnemonic, {}, {}) { } constexpr Instruction(Mnemonic mnemonic, OperandCount opCount, const Operands& operands) noexcept - : InstructionBase({}, mnemonic, opCount, operands) + : TInstructionBase({}, mnemonic, opCount, operands) { } constexpr Instruction(Attribs attribs, Mnemonic mnemonic, OperandCount opCount, const Operands& operands) noexcept - : InstructionBase(attribs, mnemonic, opCount, operands) + : TInstructionBase(attribs, mnemonic, opCount, operands) { } constexpr bool operator==(const Instruction& other) const { - return InstructionBase::operator==(other); + return TInstructionBase::operator==(other); } constexpr bool operator!=(const Instruction& other) const @@ -50,16 +53,20 @@ namespace zasm } /// - /// Returns InstructionInfo or zasm::Error for given mode and instruction. + /// Returns InstructionDetail or zasm::Error for given mode and instruction. + /// NOTE: The function is doing a bit of processing, so it should be only called when + /// all the instruction details are required. /// - Expected getDetail(MachineMode mode) const; + Expected getDetail(MachineMode mode) const; - static Expected getDetail(MachineMode mode, const Instruction& instr); + static Expected getDetail(MachineMode mode, const Instruction& instr); }; - class InstructionDetail final : public InstructionBase + class InstructionDetail final : public TInstructionBase { public: + static constexpr auto kInstrType = InstructionBase::Type::Detail; + using OperandsAccess = Packed; using OperandsVisibility = Packed; @@ -96,7 +103,7 @@ namespace zasm Attribs attribs, Mnemonic mnemonic, OperandCount opCount, const Operands& operands, const OperandsAccess& access, const OperandsVisibility& opsVisibility, const CPUFlags& flags, const Category& category, Length length = 0) noexcept - : InstructionBase{ attribs, mnemonic, opCount, operands } + : TInstructionBase{ attribs, mnemonic, opCount, operands } , _access{ access } , _opsVisibility{ opsVisibility } , _cpuFlags{ flags } @@ -107,7 +114,7 @@ namespace zasm constexpr bool operator==(const InstructionDetail& other) const { - return InstructionBase::operator==(other) && _access == other._access && _opsVisibility == other._opsVisibility + return TInstructionBase::operator==(other) && _access == other._access && _opsVisibility == other._opsVisibility && _cpuFlags == other._cpuFlags && _category == other._category && _length == other._length; } @@ -116,6 +123,10 @@ namespace zasm return !(*this == other); } + /// + /// Returns the instruction category, this is target architecture specific. + /// + /// Instruction Category constexpr Category getCategory() const noexcept { return _category; @@ -144,11 +155,20 @@ namespace zasm return opCount; } + /// + /// Returns a reference to the array that holds the visibility for each operand. + /// + /// Reference to operands visibility constexpr const OperandsVisibility& getOperandsVisibility() const noexcept { return _opsVisibility; } + /// + /// Returns the operand visibility for the specified index. + /// + /// Operand Index + /// Operand Visibility constexpr Operand::Visibility getOperandVisibility(std::size_t index) const noexcept { if (index >= _opCount) @@ -158,21 +178,41 @@ namespace zasm return _opsVisibility.get(index); } + /// + /// Returns true if the operand specified by the index is hidden. + /// + /// Operand Index + /// bool constexpr bool isOperandHidden(std::size_t index) const noexcept { return getOperandVisibility(index) == Operand::Visibility::Hidden; } + /// + /// Returns true if the operand specified by the index is explicit. + /// + /// Operand Index + /// bool constexpr bool isOperandExplicit(std::size_t index) const noexcept { return getOperandVisibility(index) == Operand::Visibility::Explicit; } + /// + /// Returns true if the operand specified by the index is implicit. + /// + /// Operand Index + /// bool constexpr bool isOperandImplicit(std::size_t index) const noexcept { return getOperandVisibility(index) == Operand::Visibility::Implicit; } + /// + /// Returns the operand access mask specified by the index. + /// + /// Operand Index + /// Access mask constexpr Operand::Access getOperandAccess(std::size_t index) const noexcept { if (index >= _opCount) @@ -182,6 +222,56 @@ namespace zasm return _access.get(index); } + /// + /// Returns true if the operand at the specified index contains the access mask. + /// + /// Operand Index + /// bool + constexpr bool hasOperandAccess(std::size_t index, Operand::Access accessMask) + { + return (getOperandAccess(index) & accessMask) != Operand::Access::None; + } + + /// + /// Returns true if the operand is read by the instruction (may read). + /// + /// Operand Index + /// bool + constexpr bool isOperandRead(std::size_t index) + { + return hasOperandAccess(index, Operand::Access::MaskRead); + } + + /// + /// Returns true if the operand is conditionally read by the instruction (may read). + /// + /// Operand Index + /// bool + constexpr bool isOperandCondRead(std::size_t index) + { + return hasOperandAccess(index, Operand::Access::CondRead); + } + + /// + /// Returns true if the operand is written by the instruction (may write). + /// + /// Operand Index + /// bool + constexpr bool isOperandWrite(std::size_t index) + { + return hasOperandAccess(index, Operand::Access::MaskWrite); + } + + /// + /// Returns true if the operand is conditionally written by the instruction (may write). + /// + /// Operand Index + /// bool + constexpr bool isOperandCondWrite(std::size_t index) + { + return hasOperandAccess(index, Operand::Access::CondWrite); + } + constexpr const OperandsAccess& getOperandsAccess() const noexcept { return _access; diff --git a/zasm/include/zasm/x86/assembler.hpp b/zasm/include/zasm/x86/assembler.hpp index 8d1c2de..2caab9b 100644 --- a/zasm/include/zasm/x86/assembler.hpp +++ b/zasm/include/zasm/x86/assembler.hpp @@ -87,7 +87,7 @@ namespace zasm::x86 /// /// Creates a new alignment node that will align the next node to the specified alignment when - /// serialized. + /// serialized. /// To align code the type should be Align::Type::Code otherwise Align::Type::Data /// Error align(Align::Type type, std::uint32_t align); @@ -118,6 +118,7 @@ namespace zasm::x86 Error emit(Instruction::Attribs attribs, Instruction::Mnemonic mnemonic, std::size_t numOps, const Operand* ops); Error emit(const Instruction& instr); + Error emit(const InstructionDetail& instr); private: void addAttrib(Instruction::Attribs attrib) noexcept diff --git a/zasm/include/zasm/x86/mnemonic.hpp b/zasm/include/zasm/x86/mnemonic.hpp index df1ca86..1e4178d 100644 --- a/zasm/include/zasm/x86/mnemonic.hpp +++ b/zasm/include/zasm/x86/mnemonic.hpp @@ -1777,4 +1777,119 @@ namespace zasm::x86 static constexpr detail::Mnemonic Xtest{}; } // namespace Mnemonic + constexpr bool isBranching(zasm::InstrMnemonic mnemonic) + { + switch (mnemonic) + { + case Mnemonic::Call: + case Mnemonic::Ret: + case Mnemonic::Jb: + case Mnemonic::Jbe: + case Mnemonic::Jcxz: + case Mnemonic::Jecxz: + case Mnemonic::Jknzd: + case Mnemonic::Jkzd: + case Mnemonic::Jl: + case Mnemonic::Jle: + case Mnemonic::Jmp: + case Mnemonic::Jnb: + case Mnemonic::Jnbe: + case Mnemonic::Jnl: + case Mnemonic::Jnle: + case Mnemonic::Jno: + case Mnemonic::Jnp: + case Mnemonic::Jns: + case Mnemonic::Jnz: + case Mnemonic::Jo: + case Mnemonic::Jp: + case Mnemonic::Jrcxz: + case Mnemonic::Js: + case Mnemonic::Jz: + return true; + } + + return false; + } + + constexpr bool isBranching(const InstructionBase& instr) + { + return isBranching(instr.getMnemonic()); + } + + constexpr bool isCondBranching(zasm::InstrMnemonic mnemonic) + { + switch (mnemonic) + { + case Mnemonic::Jb: + case Mnemonic::Jbe: + case Mnemonic::Jcxz: + case Mnemonic::Jecxz: + case Mnemonic::Jknzd: + case Mnemonic::Jkzd: + case Mnemonic::Jl: + case Mnemonic::Jle: + case Mnemonic::Jnb: + case Mnemonic::Jnbe: + case Mnemonic::Jnl: + case Mnemonic::Jnle: + case Mnemonic::Jno: + case Mnemonic::Jnp: + case Mnemonic::Jns: + case Mnemonic::Jnz: + case Mnemonic::Jo: + case Mnemonic::Jp: + case Mnemonic::Jrcxz: + case Mnemonic::Js: + case Mnemonic::Jz: + return true; + } + + return false; + } + + constexpr bool isCondBranching(const InstructionBase& instr) + { + return isCondBranching(instr.getMnemonic()); + } + + constexpr bool isSyscall(zasm::InstrMnemonic mnemonic) + { + return mnemonic == Mnemonic::Syscall; + } + + constexpr bool isSyscall(const InstructionBase& instr) + { + return isSyscall(instr.getMnemonic()); + } + + constexpr bool isCall(zasm::InstrMnemonic mnemonic) + { + return mnemonic == Mnemonic::Call; + } + + constexpr bool isCall(const InstructionBase& instr) + { + return isCall(instr.getMnemonic()); + } + + constexpr bool isRet(zasm::InstrMnemonic mnemonic) + { + return mnemonic == Mnemonic::Ret; + } + + constexpr bool isRet(const InstructionBase& instr) + { + return isRet(instr.getMnemonic()); + } + + constexpr bool isJmp(zasm::InstrMnemonic mnemonic) + { + return mnemonic == Mnemonic::Jmp; + } + + constexpr bool isJmp(const InstructionBase& instr) + { + return isJmp(instr.getMnemonic()); + } + } // namespace zasm::x86 diff --git a/zasm/src/zasm/src/program/instruction.cpp b/zasm/src/zasm/src/program/instruction.cpp index 1377a85..d6172bb 100644 --- a/zasm/src/zasm/src/program/instruction.cpp +++ b/zasm/src/zasm/src/program/instruction.cpp @@ -25,7 +25,7 @@ namespace zasm return nullptr; } - Expected Instruction::getDetail(MachineMode mode, const Instruction& instr) + Expected Instruction::getDetail(MachineMode mode, const Instruction& instr) { const auto& operands = instr.getOperands(); const auto opCount = instr.getOperandCount(); @@ -72,7 +72,7 @@ namespace zasm return decoded; } - Expected Instruction::getDetail(MachineMode mode) const + Expected Instruction::getDetail(MachineMode mode) const { return getDetail(mode, *this); } diff --git a/zasm/src/zasm/src/x86/x86.assembler.cpp b/zasm/src/zasm/src/x86/x86.assembler.cpp index 163da21..ca56494 100644 --- a/zasm/src/zasm/src/x86/x86.assembler.cpp +++ b/zasm/src/zasm/src/x86/x86.assembler.cpp @@ -145,6 +145,12 @@ namespace zasm::x86 return emit(instr.getAttribs(), instr.getMnemonic(), instr.getOperandCount(), ops.data()); } + Error Assembler::emit(const InstructionDetail& instr) + { + const auto& ops = instr.getOperands(); + return emit(instr.getAttribs(), instr.getMnemonic(), instr.getVisibleOperandCount(), ops.data()); + } + Error Assembler::embedLabel(Label label) { BitSize size = BitSize::_0;