ejschwartz's picture
open relocs by default
a74a996
raw
history blame
5.06 kB
import editdistance
import frontmatter
from hexdump2 import hexdump
import gradio as gr
import json
import shlex
import subprocess
import tempfile
description = frontmatter.load("README.md").content
def trim(str, n):
return "\n".join(str.splitlines()[n:])
def trim_objdump(str):
return trim(str, 7)
def disassemble_bytes(byte_data, architecture, options):
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as temp_bin_file:
temp_bin_file.write(byte_data)
temp_bin_file_name = temp_bin_file.name
disassembly = subprocess.run(
["objdump", "-D", "-b", "binary", "-m", architecture, "-M", options, temp_bin_file_name],
capture_output=True,
text=True
).stdout
disassembly = trim_objdump(disassembly)
return disassembly
def compile(compiler, flags, source):
# Create a temporary file for the C source code
with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file:
temp_c_file.write(source.encode())
temp_c_file_name = temp_c_file.name
# Create a temporary file for the object file
with tempfile.NamedTemporaryFile(suffix=".o", delete=False) as temp_o_file:
temp_o_file_name = temp_o_file.name
# Compile the C file to an object file
result = subprocess.run(
[compiler, "-c", temp_c_file_name]
+ shlex.split(flags)
+ ["-o", temp_o_file_name],
capture_output=True,
text=True,
)
compile_output = result.stdout + result.stderr
# Create a temporary file for the raw bytes
with tempfile.NamedTemporaryFile(suffix=".raw", delete=True) as raw_bytes_file:
subprocess.run(
[
"objcopy",
"--only-section",
".text",
# XXX in reality we should probably look at the sections
"--only-section",
".text.*",
"-O",
"binary",
temp_o_file_name,
raw_bytes_file.name,
]
)
compiled_bytes = raw_bytes_file.read()
# Disassemble the object file
disassembly = subprocess.run(
["objdump", "-d", temp_o_file_name],
capture_output=True,
text=True
).stdout
disassembly = trim_objdump(disassembly)
# Relocs
# relocs = subprocess.run(
# ["objdump", "-r", temp_o_file_name],
# capture_output=True,
# text=True
# ).stdout
# relocs = trim(relocs, 3)
json_relocs = subprocess.run(
["llvm-readobj-19", "--elf-output-style=JSON", "--relocations", temp_o_file_name],
capture_output=True,
text=True,
).stdout
json_relocs = json.loads(json_relocs)
if result.returncode == 0:
return json_relocs, compiled_bytes, compile_output, disassembly
else:
return None, None, compile_output, disassembly
def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
target_bytes = bytes.fromhex(target_bytes)
compiled_relocs, compiled_bytes, compile_output, compiled_disassembly = compile(compiler, flags, source)
target_disassembly = disassemble_bytes(target_bytes, disasm_arch, disasm_options)
if compiled_bytes is not None:
return (
hexdump(compiled_bytes, result="return"),
hexdump(target_bytes, result="return"),
editdistance.eval(compiled_bytes, target_bytes),
compile_output,
compiled_disassembly,
compiled_relocs,
target_disassembly
)
else:
return (
"Compilation failed",
hexdump(target_bytes, result="return"),
-1,
compile_output,
compiled_disassembly,
compiled_relocs,
target_disassembly
)
def run():
demo = gr.Interface(
fn=predict,
description=description,
inputs=[
gr.Textbox(
lines=10,
label="Bytes of Target Function (in hex)",
value="b8 2a 00 00 00 c3",
),
gr.Textbox(
lines=10,
label="Decompiled C Source Code",
value="int foo() { return 0; }",
),
gr.Textbox(label="Compiler", value="g++"),
gr.Textbox(label="Compiler Flags", value="-O2"),
gr.Textbox(label="Architecture (objdump -m)", value="i386"),
gr.Textbox(label="Disassembler options (objdump -M)", value="x86-64")
],
outputs=[
gr.Textbox(label="Compiled bytes"),
gr.Textbox(label="Target bytes"),
gr.Number(label="Edit distance (lower is better)"),
gr.Textbox(label="Compiler Output"),
gr.Textbox(label="Compiled Disassembly"),
gr.JSON(label="Compiled relocations", open=True),
gr.Textbox(label="Target Disassembly"),
],
)
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
run()