File size: 2,624 Bytes
94a508d
70ebd4a
66f8fc1
20b2b87
b4c7402
 
 
e98177c
 
 
e9721d6
 
 
 
e98177c
 
 
20b2b87
b4c7402
 
 
 
e98177c
b4c7402
 
 
e98177c
b4c7402
029945b
 
 
 
 
 
e98177c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36c1bbc
e98177c
b4c7402
 
e98177c
94a508d
 
c6b1bca
94a508d
 
 
747e005
94a508d
66f8fc1
 
329f9c0
 
 
e98177c
41e9ae6
94a508d
 
 
 
 
 
 
 
 
 
2901d44
20f12de
41e9ae6
029945b
 
 
a71a75a
029945b
329f9c0
66f8fc1
329f9c0
720784d
e98177c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import editdistance
from hexdump2 import hexdump
import gradio as gr
import shlex
import subprocess
import tempfile

description = """This is a space testing a method for evaluating the quality of decompilation.

Currently unhandled features:
* PIC stuff
  * Global references
  * Function calls
  * Wildcards in target function?
"""


def compile(compiler, flags, source):
    # Create a temporary file for the C source code
    with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file:
        temp_c_file.write(source.encode())
        temp_c_file_name = temp_c_file.name

    # Create a temporary file for the object file
    with tempfile.NamedTemporaryFile(suffix=".o", delete=False) as temp_o_file:
        temp_o_file_name = temp_o_file.name

    # Compile the C file to an object file
    subprocess.run(
        [compiler, "-c", temp_c_file_name]
        + shlex.split(flags)
        + ["-o", temp_o_file_name],
        check=True,
    )

    # Create a temporary file for the raw bytes
    with tempfile.NamedTemporaryFile(suffix=".raw", delete=True) as raw_bytes_file:
        subprocess.run(
            [
                "objcopy",
                "--only-section",
                ".text",
                "-O",
                "binary",
                temp_o_file_name,
                raw_bytes_file.name,
            ]
        )
        compiled_bytes = raw_bytes_file.read()
        print(compiled_bytes)

    return compiled_bytes


def predict(target_bytes, source, compiler, flags):
    target_bytes = bytes.fromhex(target_bytes)
    compiled_bytes = compile(compiler, flags, source)
    return (
        hexdump(compiled_bytes, result="return"),
        hexdump(target_bytes, result="return"),
        editdistance.eval(compiled_bytes, target_bytes)
    )


def run():
    demo = gr.Interface(
        fn=predict,
        description=description,
        inputs=[
            gr.Textbox(
                lines=10,
                label="Bytes of Target Function (in hex)",
                value="b8 2a 00 00 00 c3",
            ),
            gr.Textbox(
                lines=10,
                label="Decompiled C Source Code",
                value="int foo() { return 42; }",
            ),
            gr.Textbox(label="Compiler", value="g++"),
            gr.Textbox(label="Compiler Flags", value="-O2"),
        ],
        outputs=[
            gr.Textbox(label="Compiled bytes"),
            gr.Textbox(label="Target bytes"),
            gr.Number(label="Edit distance (lower is better)"),
        ],
    )

    demo.launch(server_name="0.0.0.0", server_port=7860)


run()