ejschwartz's picture
chr
cdbe5e3
raw
history blame
5.95 kB
import editdistance
import frontmatter
from hexdump2 import hexdump
import gradio as gr
import json
import shlex
import subprocess
import tempfile
from dist import levenshtein_with_wildcard, print_match_summary
description = frontmatter.load("README.md").content
def trim(str, n):
return "\n".join(str.splitlines()[n:])
def trim_objdump(str):
return trim(str, 7)
def disassemble_bytes(byte_data, architecture, options):
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as temp_bin_file:
temp_bin_file.write(byte_data)
temp_bin_file_name = temp_bin_file.name
disassembly = subprocess.run(
["objdump", "-D", "-b", "binary", "-m", architecture, "-M", options, temp_bin_file_name],
capture_output=True,
text=True
).stdout
disassembly = trim_objdump(disassembly)
return disassembly
def compile(compiler, flags, source):
# Create a temporary file for the C source code
with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file:
temp_c_file.write(source.encode())
temp_c_file_name = temp_c_file.name
# Create a temporary file for the object file
with tempfile.NamedTemporaryFile(suffix=".o", delete=False) as temp_o_file:
temp_o_file_name = temp_o_file.name
# Compile the C file to an object file
result = subprocess.run(
[compiler, "-c", temp_c_file_name]
+ shlex.split(flags)
+ ["-o", temp_o_file_name],
capture_output=True,
text=True,
)
compile_output = result.stdout + result.stderr
# Create a temporary file for the raw bytes
with tempfile.NamedTemporaryFile(suffix=".raw", delete=True) as raw_bytes_file:
subprocess.run(
[
"objcopy",
"--only-section",
".text",
# XXX in reality we should probably look at the sections
"--only-section",
".text.*",
"-O",
"binary",
temp_o_file_name,
raw_bytes_file.name,
]
)
compiled_bytes = raw_bytes_file.read()
# Disassemble the object file
disassembly = subprocess.run(
["objdump", "-dr", temp_o_file_name],
capture_output=True,
text=True
).stdout
disassembly = trim_objdump(disassembly)
# Relocs
# relocs = subprocess.run(
# ["objdump", "-r", temp_o_file_name],
# capture_output=True,
# text=True
# ).stdout
# relocs = trim(relocs, 3)
json_relocs = subprocess.run(
["llvm-readobj-19", "--elf-output-style=JSON", "--relocations", temp_o_file_name],
capture_output=True,
text=True,
).stdout
json_relocs = json.loads(json_relocs)
json_relocs = json_relocs[0]["Relocations"]
json_relocs = [r["Relocation"] for d in json_relocs for r in d['Relocs']]
# Filter out .text
json_relocs = [r for r in json_relocs if r["Symbol"]["Name"] != ".text"]
def reloc_type2size(s):
match s:
case "R_X86_64_PC32":
return 4
case "R_X86_64_PLT32":
return 4
case _:
assert False, f"Unknown reloc {s}"
relocs_byte_range = [range(r["Offset"], r["Offset"] + reloc_type2size(r["Type"]["Name"])) for r in json_relocs]
# Flatten relocs_byte_range
relocs_byte_range = [chr(i) for r in relocs_byte_range for i in r]
print(f"relocs: {relocs_byte_range}")
print(print_match_summary(b"test", compiled_bytes, wildcard_offsets_str2=relocs_byte_range))
if result.returncode == 0:
return json_relocs, compiled_bytes, compile_output, disassembly
else:
return None, None, compile_output, disassembly
def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
target_bytes = bytes.fromhex(target_bytes)
compiled_relocs, compiled_bytes, compile_output, compiled_disassembly = compile(compiler, flags, source)
target_disassembly = disassemble_bytes(target_bytes, disasm_arch, disasm_options)
if compiled_bytes is not None:
return (
hexdump(compiled_bytes, result="return"),
hexdump(target_bytes, result="return"),
editdistance.eval(compiled_bytes, target_bytes),
compile_output,
compiled_disassembly,
compiled_relocs,
target_disassembly
)
else:
return (
"Compilation failed",
hexdump(target_bytes, result="return"),
-1,
compile_output,
compiled_disassembly,
compiled_relocs,
target_disassembly
)
def run():
demo = gr.Interface(
fn=predict,
description=description,
inputs=[
gr.Textbox(
lines=10,
label="Bytes of Target Function (in hex)",
value="b8 2a 00 00 00 c3",
),
gr.Textbox(
lines=10,
label="Decompiled C Source Code",
value="int x;\nint foo() { return x; }",
),
gr.Textbox(label="Compiler", value="g++"),
gr.Textbox(label="Compiler Flags", value="-O2"),
gr.Textbox(label="Architecture (objdump -m)", value="i386"),
gr.Textbox(label="Disassembler options (objdump -M)", value="x86-64")
],
outputs=[
gr.Textbox(label="Compiled bytes"),
gr.Textbox(label="Target bytes"),
gr.Number(label="Edit distance (lower is better)"),
gr.Textbox(label="Compiler Output"),
gr.Textbox(label="Compiled Disassembly"),
gr.JSON(label="Compiled relocations", open=True),
gr.Textbox(label="Target Disassembly"),
],
)
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
run()