import editdistance from hexdump2 import hexdump import gradio as gr import shlex import subprocess import tempfile description = """This is a space testing a method for evaluating the quality of decompilation. Currently unhandled features: * PIC stuff * Global references * Function calls * Wildcards in target function? """ def compile(compiler, flags, source): # Create a temporary file for the C source code with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file: temp_c_file.write(source.encode()) temp_c_file_name = temp_c_file.name # Create a temporary file for the object file with tempfile.NamedTemporaryFile(suffix=".o", delete=False) as temp_o_file: temp_o_file_name = temp_o_file.name # Compile the C file to an object file subprocess.run( [compiler, "-c", temp_c_file_name] + shlex.split(flags) + ["-o", temp_o_file_name], check=True, ) # Create a temporary file for the raw bytes with tempfile.NamedTemporaryFile(suffix=".raw", delete=True) as raw_bytes_file: subprocess.run( [ "objcopy", "--only-section", ".text", "-O", "binary", temp_o_file_name, raw_bytes_file.name, ] ) compiled_bytes = raw_bytes_file.read() print(compiled_bytes) return compiled_bytes def predict(target_bytes, source, compiler, flags): target_bytes = bytes.fromhex(target_bytes) compiled_bytes = compile(compiler, flags, source) return ( hexdump(compiled_bytes, result="return"), hexdump(target_bytes, result="return"), editdistance.eval(compiled_bytes, target_bytes) ) def run(): demo = gr.Interface( fn=predict, description=description, inputs=[ gr.Textbox( lines=10, label="Bytes of Target Function (in hex)", value="b8 2a 00 00 00 c3", ), gr.Textbox( lines=10, label="Decompiled C Source Code", value="int foo() { return 0; }", ), gr.Textbox(label="Compiler", value="g++"), gr.Textbox(label="Compiler Flags", value="-O2"), ], outputs=[ gr.Textbox(label="Compiled bytes"), gr.Textbox(label="Target bytes"), gr.Number(label="Edit distance (lower is better)"), ], ) demo.launch(server_name="0.0.0.0", server_port=7860) run()