iced-x86 disassembler Python bindings

iced-x86 is a blazing fast and correct x86 (16/32/64-bit) disassembler for Python.
- 👍 Supports all Intel and AMD instructions
- 👍 Correct: All instructions are tested and iced has been tested against other disassemblers/assemblers (xed, gas, objdump, masm, dumpbin, nasm, ndisasm) and fuzzed
- 👍 The formatter supports masm, nasm, gas (AT&T), Intel (XED) and there are many options to customize the output
- 👍 The encoder can be used to re-encode decoded instructions at any address
- 👍 API to get instruction info, eg. read/written registers, memory and rflags bits; CPUID feature flag, control flow info, etc
- 👍 Rust + Python
- 👍 License: MIT
Rust crate: https://github.com/icedland/iced/blob/master/src/rust/iced-x86/README.md
Installing iced-x86
It's on PyPI with built wheels for Windows, macOS and Linux so this command should work (use python or py if on Windows):
python3 -m pip install -U iced-x86
If pip tries to build it from source and fails, see below for all required build tools (eg. python3 -m pip install setuptools wheel setuptools-rust and Rust https://www.rust-lang.org/tools/install).
Building the code
If on Windows, replace python3 in all commands with python or py.
Prerequisites:
python3 setup.py bdist_wheel
python3 -m pip install iced-x86 --no-index -f dist --only-binary iced-x86
python3 -m pip uninstall iced-x86
Prerequisites (tests/docs):
python3 -m pip install -r requirements-dev.txt
Tests:
python3 setup.py bdist_wheel
python3 -m pip install iced-x86 --no-index -f dist --only-binary iced-x86
python3 -m pytest
python3 -m pip uninstall -y iced-x86
Docs:
python3 setup.py bdist_wheel
python3 -m sphinx --color -n -W --keep-going -b html docs docs/_build
python3 -m sphinx --color -n -W --keep-going -b doctest docs docs/_build
How-tos
Disassemble (decode and format instructions)
This example uses a Decoder and one of the Formatters to decode and format the code.
The last part shows how to use format specifiers to format instructions.
from iced_x86 import *
EXAMPLE_CODE_BITNESS = 64
EXAMPLE_CODE_RIP = 0x0000_7FFA_C46A_CDA4
EXAMPLE_CODE = \
b"\x48\x89\x5C\x24\x10\x48\x89\x74\x24\x18\x55\x57\x41\x56\x48\x8D" \
b"\xAC\x24\x00\xFF\xFF\xFF\x48\x81\xEC\x00\x02\x00\x00\x48\x8B\x05" \
b"\x18\x57\x0A\x00\x48\x33\xC4\x48\x89\x85\xF0\x00\x00\x00\x4C\x8B" \
b"\x05\x2F\x24\x0A\x00\x48\x8D\x05\x78\x7C\x04\x00\x33\xFF"
decoder = Decoder(EXAMPLE_CODE_BITNESS, EXAMPLE_CODE, ip=EXAMPLE_CODE_RIP)
formatter = Formatter(FormatterSyntax.NASM)
formatter.digit_separator = "`"
formatter.first_operand_char_index = 10
for instr in decoder:
disasm = formatter.format(instr)
start_index = instr.ip - EXAMPLE_CODE_RIP
bytes_str = EXAMPLE_CODE[start_index:start_index + instr.len].hex().upper()
print(f"{instr.ip:016X} {bytes_str:20} {disasm}")
decoder = Decoder(64, b"\x86\x64\x32\x16", ip=0x1234_5678)
instr = decoder.decode()
print()
print("Format specifiers example:")
print(f"{instr:f}")
print(f"{instr:g}")
print(f"{instr:i}")
print(f"{instr:m}")
print(f"{instr:n}")
print(f"{instr:gG_xSs}")
Adding type annotations
For performance reasons, real Python enums are not used. They're just too slow. Instead, all enums are
currently modules with constants in them. However, this causes problems with type checkers such as
mypy since it sees integers instead of eg. Registers.
If you add type annotations to methods or variables, the enum name to use is the enum name with an
appended _, eg. if the enum is Register (which is a module), use Register_ as the type name.
You don't need to do this with classes, eg. Instruction, since they're not enums.
from iced_x86 import *
BASE_REG: Register_ = Register.RAX
def my_fun(code: Code_, reg: Register_, reg2: Register_) -> Register_:
return reg2
my_fun(Code.RDTSC, BASE_REG, Register.ECX)
Create and encode instructions
This example uses a BlockEncoder to encode created Instructions.
from iced_x86 import *
bitness = 64
label_id: int = 1
def create_label() -> int:
global label_id
idd = label_id
label_id += 1
return idd
def add_label(id: int, instruction: Instruction) -> Instruction:
instruction.ip = id
return instruction
label1 = create_label()
instructions = []
instructions.append(Instruction.create_reg(Code.PUSH_R64, Register.RBP))
instructions.append(Instruction.create_reg(Code.PUSH_R64, Register.RDI))
instructions.append(Instruction.create_reg(Code.PUSH_R64, Register.RSI))
instructions.append(Instruction.create_reg_u32(
Code.SUB_RM64_IMM32, Register.RSP, 0x50))
instructions.append(Instruction.create(Code.VEX_VZEROUPPER))
instructions.append(Instruction.create_reg_mem(
Code.LEA_R64_M, Register.RBP, MemoryOperand(Register.RSP, displ=0x60)))
instructions.append(Instruction.create_reg_reg(
Code.MOV_R64_RM64, Register.RSI, Register.RCX))
instructions.append(Instruction.create_reg_mem(
Code.LEA_R64_M, Register.RDI, MemoryOperand(Register.RBP, displ=-0x38)))
instructions.append(Instruction.create_reg_i32(
Code.MOV_R32_IMM32, Register.ECX, 0x0A))
instructions.append(Instruction.create_reg_reg(
Code.XOR_R32_RM32, Register.EAX, Register.EAX))
instructions.append(Instruction.create_rep_stosd(bitness))
instructions.append(Instruction.create_reg_u64(
Code.CMP_RM64_IMM32, Register.RSI, 0x1234_5678))
instructions.append(Instruction.create_branch(Code.JNE_REL32_64, label1))
instructions.append(Instruction.create(Code.NOPD))
instructions.append(add_label(label1, Instruction.create_reg_reg(
Code.XOR_R32_RM32, Register.R15D, Register.R15D)))
data1 = create_label()
instructions.append(Instruction.create_reg_mem(
Code.LEA_R64_M, Register.R14, MemoryOperand(Register.RIP, displ=data1)))
instructions.append(Instruction.create(Code.NOPD))
raw_data = b"\x12\x34\x56\x78"
instructions.append(
add_label(data1, Instruction.create_declare_byte(raw_data)))
target_rip = 0x0000_1248_FC84_0000
encoder = BlockEncoder(bitness)
encoder.add_many(instructions)
encoded_bytes = encoder.encode(target_rip)
bytes_code = encoded_bytes[0:len(encoded_bytes) - len(raw_data)]
bytes_data = encoded_bytes[len(encoded_bytes) - len(raw_data):]
decoder = Decoder(bitness, bytes_code, ip=target_rip)
formatter = Formatter(FormatterSyntax.GAS)
formatter.first_operand_char_index = 8
for instruction in decoder:
disasm = formatter.format(instruction)
print(f"{instruction.ip:016X} {disasm}")
db = Instruction.create_declare_byte(bytes_data)
print(f"{decoder.ip:016X} {formatter.format(db)}")
Move code in memory (eg. hook a function)
Uses instruction info API and the encoder to patch a function to jump to the programmer's function.
from iced_x86 import *
def disassemble(data: bytes, ip: int) -> None:
formatter = Formatter(FormatterSyntax.NASM)
decoder = Decoder(EXAMPLE_CODE_BITNESS, data, ip=ip)
for instruction in decoder:
disasm = formatter.format(instruction)
print(f"{instruction.ip:016X} {disasm}")
print()
def how_to_move_code() -> None:
print("Original code:")
disassemble(EXAMPLE_CODE, EXAMPLE_CODE_RIP)
decoder = Decoder(EXAMPLE_CODE_BITNESS, EXAMPLE_CODE, ip=EXAMPLE_CODE_RIP)
required_bytes = 10 + 2
total_bytes = 0
orig_instructions = []
for instr in decoder:
orig_instructions.append(instr)
total_bytes += instr.len
if not instr:
raise ValueError("Found garbage")
if total_bytes >= required_bytes:
break
cflow = instr.flow_control
if cflow == FlowControl.NEXT:
pass
elif cflow == FlowControl.UNCONDITIONAL_BRANCH:
if instr.op0_kind == OpKind.NEAR_BRANCH64:
_target = instr.near_branch_target
raise ValueError("Not supported by this simple example")
else:
raise ValueError("Not supported by this simple example")
if total_bytes < required_bytes:
raise ValueError("Not enough bytes!")
if len(orig_instructions) == 0:
raise ValueError("Should not be empty here")
last_instr = orig_instructions[-1]
if last_instr.flow_control != FlowControl.RETURN:
orig_instructions.append(Instruction.create_branch(Code.JMP_REL32_64, last_instr.next_ip))
relocated_base_address = EXAMPLE_CODE_RIP + 0x20_0000
encoder = BlockEncoder(decoder.bitness)
encoder.add_many(orig_instructions)
new_code = encoder.encode(relocated_base_address)
YOUR_FUNC: int = 0x1234_5678_9ABC_DEF0
example_code = bytearray(EXAMPLE_CODE)
example_code[0] = 0x48
example_code[1] = 0xB8
v = YOUR_FUNC
for i in range(2, 10):
example_code[i] = v & 0xFF
v >>= 8
example_code[10] = 0xFF
example_code[11] = 0xE0
print("Original + patched code:")
disassemble(example_code, EXAMPLE_CODE_RIP)
print("Moved code:")
disassemble(new_code, relocated_base_address)
EXAMPLE_CODE_BITNESS: int = 64
EXAMPLE_CODE_RIP: int = 0x0000_7FFA_C46A_CDA4
EXAMPLE_CODE: bytes = \
b"\x48\x89\x5C\x24\x10\x48\x89\x74\x24\x18\x55\x57\x41\x56\x48\x8D" \
b"\xAC\x24\x00\xFF\xFF\xFF\x48\x81\xEC\x00\x02\x00\x00\x48\x8B\x05" \
b"\x18\x57\x0A\x00\x48\x33\xC4\x48\x89\x85\xF0\x00\x00\x00\x4C\x8B" \
b"\x05\x2F\x24\x0A\x00\x48\x8D\x05\x78\x7C\x04\x00\x33\xFF"
how_to_move_code()
Get instruction info, eg. read/written regs/mem, control flow info, etc
Shows how to get used registers/memory and other info. It uses Instruction methods
and an InstructionInfoFactory to get this info.
from iced_x86 import *
from typing import Dict, Sequence
from types import ModuleType
def how_to_get_instruction_info() -> None:
decoder = Decoder(EXAMPLE_CODE_BITNESS, EXAMPLE_CODE, ip=EXAMPLE_CODE_RIP)
info_factory = InstructionInfoFactory()
for instr in decoder:
offsets = decoder.get_constant_offsets(instr)
print(f"{instr.ip:016X} {instr}")
op_code = instr.op_code()
info = info_factory.info(instr)
fpu_info = instr.fpu_stack_increment_info()
print(f" OpCode: {op_code.op_code_string}")
print(f" Instruction: {op_code.instruction_string}")
print(f" Encoding: {encoding_kind_to_string(instr.encoding)}")
print(f" Mnemonic: {mnemonic_to_string(instr.mnemonic)}")
print(f" Code: {code_to_string(instr.code)}")
print(f" CpuidFeature: {cpuid_features_to_string(instr.cpuid_features())}")
print(f" FlowControl: {flow_control_to_string(instr.flow_control)}")
if fpu_info.writes_top:
if fpu_info.increment == 0:
print(f" FPU TOP: the instruction overwrites TOP")
else:
print(f" FPU TOP inc: {fpu_info.increment}")
cond_write = "True" if fpu_info.conditional else "False"
print(f" FPU TOP cond write: {cond_write}")
if offsets.has_displacement:
print(f" Displacement offset = {offsets.displacement_offset}, size = {offsets.displacement_size}")
if offsets.has_immediate:
print(f" Immediate offset = {offsets.immediate_offset}, size = {offsets.immediate_size}")
if offsets.has_immediate2:
print(f" Immediate #2 offset = {offsets.immediate_offset2}, size = {offsets.immediate_size2}")
if instr.is_stack_instruction:
print(f" SP Increment: {instr.stack_pointer_increment}")
if instr.condition_code != ConditionCode.NONE:
print(f" Condition code: {condition_code_to_string(instr.condition_code)}")
if instr.rflags_read != RflagsBits.NONE:
print(f" RFLAGS Read: {rflags_bits_to_string(instr.rflags_read)}")
if instr.rflags_written != RflagsBits.NONE:
print(f" RFLAGS Written: {rflags_bits_to_string(instr.rflags_written)}")
if instr.rflags_cleared != RflagsBits.NONE:
print(f" RFLAGS Cleared: {rflags_bits_to_string(instr.rflags_cleared)}")
if instr.rflags_set != RflagsBits.NONE:
print(f" RFLAGS Set: {rflags_bits_to_string(instr.rflags_set)}")
if instr.rflags_undefined != RflagsBits.NONE:
print(f" RFLAGS Undefined: {rflags_bits_to_string(instr.rflags_undefined)}")
if instr.rflags_modified != RflagsBits.NONE:
print(f" RFLAGS Modified: {rflags_bits_to_string(instr.rflags_modified)}")
for i in range(instr.op_count):
op_kind = instr.op_kind(i)
if op_kind == OpKind.MEMORY:
size = MemorySizeExt.size(instr.memory_size)
if size != 0:
print(f" Memory size: {size}")
break
for i in range(instr.op_count):
print(f" Op{i}Access: {op_access_to_string(info.op_access(i))}")
for i in range(op_code.op_count):
print(f" Op{i}: {op_code_operand_kind_to_string(op_code.op_kind(i))}")
for reg_info in info.used_registers():
print(f" Used reg: {used_reg_to_string(reg_info)}")
for mem_info in info.used_memory():
print(f" Used mem: {used_mem_to_string(mem_info)}")
def rflags_bits_to_string(rf: int) -> str:
def append(sb: str, s: str) -> str:
if len(sb) != 0:
sb += ", "
return sb + s
sb = ""
if (rf & RflagsBits.OF) != 0:
sb = append(sb, "OF")
if (rf & RflagsBits.SF) != 0:
sb = append(sb, "SF")
if (rf & RflagsBits.ZF) != 0:
sb = append(sb, "ZF")
if (rf & RflagsBits.AF) != 0:
sb = append(sb, "AF")
if (rf & RflagsBits.CF) != 0:
sb = append(sb, "CF")
if (rf & RflagsBits.PF) != 0:
sb = append(sb, "PF")
if (rf & RflagsBits.DF) != 0:
sb = append(sb, "DF")
if (rf & RflagsBits.IF) != 0:
sb = append(sb, "IF")
if (rf & RflagsBits.AC) != 0:
sb = append(sb, "AC")
if (rf & RflagsBits.UIF) != 0:
sb = append(sb, "UIF")
if len(sb) == 0:
return "<empty>"
return sb
EXAMPLE_CODE_BITNESS: int = 64
EXAMPLE_CODE_RIP: int = 0x0000_7FFA_C46A_CDA4
EXAMPLE_CODE: bytes = \
b"\x48\x89\x5C\x24\x10\x48\x89\x74\x24\x18\x55\x57\x41\x56\x48\x8D" \
b"\xAC\x24\x00\xFF\xFF\xFF\x48\x81\xEC\x00\x02\x00\x00\x48\x8B\x05" \
b"\x18\x57\x0A\x00\x48\x33\xC4\x48\x89\x85\xF0\x00\x00\x00\x4C\x8B" \
b"\x05\x2F\x24\x0A\x00\x48\x8D\x05\x78\x7C\x04\x00\x33\xFF"
def create_enum_dict(module: ModuleType) -> Dict[int, str]:
return {module.__dict__[key]:key for key in module.__dict__ if isinstance(module.__dict__[key], int)}
REGISTER_TO_STRING: Dict[Register_, str] = create_enum_dict(Register)
def register_to_string(value: Register_) -> str:
s = REGISTER_TO_STRING.get(value)
if s is None:
return str(value) + " /*Register enum*/"
return s
OP_ACCESS_TO_STRING: Dict[OpAccess_, str] = create_enum_dict(OpAccess)
def op_access_to_string(value: OpAccess_) -> str:
s = OP_ACCESS_TO_STRING.get(value)
if s is None:
return str(value) + " /*OpAccess enum*/"
return s
ENCODING_KIND_TO_STRING: Dict[EncodingKind_, str] = create_enum_dict(EncodingKind)
def encoding_kind_to_string(value: EncodingKind_) -> str:
s = ENCODING_KIND_TO_STRING.get(value)
if s is None:
return str(value) + " /*EncodingKind enum*/"
return s
MNEMONIC_TO_STRING: Dict[Mnemonic_, str] = create_enum_dict(Mnemonic)
def mnemonic_to_string(value: Mnemonic_) -> str:
s = MNEMONIC_TO_STRING.get(value)
if s is None:
return str(value) + " /*Mnemonic enum*/"
return s
CODE_TO_STRING: Dict[Code_, str] = create_enum_dict(Code)
def code_to_string(value: Code_) -> str:
s = CODE_TO_STRING.get(value)
if s is None:
return str(value) + " /*Code enum*/"
return s
FLOW_CONTROL_TO_STRING: Dict[FlowControl_, str] = create_enum_dict(FlowControl)
def flow_control_to_string(value: FlowControl_) -> str:
s = FLOW_CONTROL_TO_STRING.get(value)
if s is None:
return str(value) + " /*FlowControl enum*/"
return s
OP_CODE_OPERAND_KIND_TO_STRING: Dict[OpCodeOperandKind_, str] = create_enum_dict(OpCodeOperandKind)
def op_code_operand_kind_to_string(value: OpCodeOperandKind_) -> str:
s = OP_CODE_OPERAND_KIND_TO_STRING.get(value)
if s is None:
return str(value) + " /*OpCodeOperandKind enum*/"
return s
CPUID_FEATURE_TO_STRING: Dict[CpuidFeature_, str] = create_enum_dict(CpuidFeature)
def cpuid_feature_to_string(value: CpuidFeature_) -> str:
s = CPUID_FEATURE_TO_STRING.get(value)
if s is None:
return str(value) + " /*CpuidFeature enum*/"
return s
def cpuid_features_to_string(cpuid_features: Sequence[int]) -> str:
return " and ".join([cpuid_feature_to_string(f) for f in cpuid_features])
MEMORY_SIZE_TO_STRING: Dict[MemorySize_, str] = create_enum_dict(MemorySize)
def memory_size_to_string(value: MemorySize_) -> str:
s = MEMORY_SIZE_TO_STRING.get(value)
if s is None:
return str(value) + " /*MemorySize enum*/"
return s
CONDITION_CODE_TO_STRING: Dict[ConditionCode_, str] = create_enum_dict(ConditionCode)
def condition_code_to_string(value: ConditionCode_) -> str:
s = CONDITION_CODE_TO_STRING.get(value)
if s is None:
return str(value) + " /*ConditionCode enum*/"
return s
def used_reg_to_string(reg_info: UsedRegister) -> str:
return register_to_string(reg_info.register) + ":" + op_access_to_string(reg_info.access)
def used_mem_to_string(mem_info: UsedMemory) -> str:
sb = "[" + register_to_string(mem_info.segment) + ":"
need_plus = mem_info.base != Register.NONE
if need_plus:
sb += register_to_string(mem_info.base)
if mem_info.index != Register.NONE:
if need_plus:
sb += "+"
need_plus = True
sb += register_to_string(mem_info.index)
if mem_info.scale != 1:
sb += "*" + str(mem_info.scale)
if mem_info.displacement != 0 or not need_plus:
if need_plus:
sb += "+"
sb += f"0x{mem_info.displacement:X}"
sb += ";" + memory_size_to_string(mem_info.memory_size) + ";" + op_access_to_string(mem_info.access) + "]"
return sb
how_to_get_instruction_info()
Disassemble old/deprecated CPU instructions
from iced_x86 import *
TEST_CODE = \
b"\x66\x0F\x1A\x08" \
b"\x0F\x26\xDE" \
b"\x0F\x36\x00" \
b"\x0F\x39" \
b"\x0F\x78\x08" \
b"\x0F\x3D" \
b"\x0F\x58\x08" \
b"\xDF\xFC" \
b"\x0F\x3F"
DECODER_OPTIONS = DecoderOptions.MPX | \
DecoderOptions.MOV_TR | \
DecoderOptions.CYRIX | \
DecoderOptions.CYRIX_DMI | \
DecoderOptions.ALTINST
decoder = Decoder(32, TEST_CODE, DECODER_OPTIONS, ip=0x731E_0A03)
for instr in decoder:
print(f"{instr.ip:08X} {instr:ns}")