File size: 6,587 Bytes
94a508d
5f0a407
70ebd4a
66f8fc1
1defe4d
20b2b87
b4c7402
 
 
fc43ad5
52b18ab
5f0a407
e98177c
3c8598e
a2767e5
80ffc07
a2767e5
3c8598e
a2767e5
567f66d
a2767e5
3c8598e
812a13e
a2767e5
 
 
 
 
3c8598e
 
 
 
 
 
 
 
 
 
 
a2767e5
3c8598e
a2767e5
 
 
 
e98177c
3c8598e
20b2b87
b4c7402
 
 
 
e98177c
b4c7402
 
 
e98177c
b4c7402
5a649d5
d65ea20
 
 
5a649d5
d65ea20
029945b
5a649d5
e98177c
ce7ca57
6cdaa1a
ce7ca57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c8598e
 
 
 
 
 
1defe4d
 
ce7ca57
 
 
 
 
 
b639ecc
d65ea20
ce7ca57
ff724df
3c8598e
f6ba557
 
 
 
 
 
 
 
 
3c8598e
f6ba557
3c8598e
 
 
 
 
 
 
 
f6ba557
812a13e
94a508d
3c8598e
 
 
42886c0
d65ea20
 
f6ba557
3c8598e
 
 
 
 
f6ba557
 
 
d65ea20
 
 
 
523d28e
b034509
d65ea20
ff724df
6cdaa1a
3c8598e
d65ea20
 
 
 
 
 
523d28e
 
d65ea20
ff724df
6cdaa1a
3c8598e
d65ea20
66f8fc1
 
329f9c0
 
 
e98177c
41e9ae6
94a508d
 
 
 
 
 
 
 
1d30af4
94a508d
2901d44
20f12de
812a13e
3c8598e
41e9ae6
029945b
 
 
a71a75a
523d28e
 
5a649d5
ff724df
a74a996
ff724df
029945b
329f9c0
66f8fc1
22568e3
720784d
e98177c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
import editdistance
import frontmatter
from hexdump2 import hexdump
import gradio as gr
import json
import shlex
import subprocess
import tempfile

from dist import levenshtein_with_wildcard, print_match_summary

description = frontmatter.load("README.md").content


def trim(str, n):
    return "\n".join(str.splitlines()[n:])


def trim_objdump(str):
    return trim(str, 7)


def disassemble_bytes(byte_data, architecture, options):
    with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as temp_bin_file:
        temp_bin_file.write(byte_data)
        temp_bin_file_name = temp_bin_file.name

    disassembly = subprocess.run(
        [
            "objdump",
            "-D",
            "-b",
            "binary",
            "-m",
            architecture,
            "-M",
            options,
            temp_bin_file_name,
        ],
        capture_output=True,
        text=True,
    ).stdout
    disassembly = trim_objdump(disassembly)

    return disassembly


def compile(compiler, flags, source):
    # Create a temporary file for the C source code
    with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file:
        temp_c_file.write(source.encode())
        temp_c_file_name = temp_c_file.name

    # Create a temporary file for the object file
    with tempfile.NamedTemporaryFile(suffix=".o", delete=False) as temp_o_file:
        temp_o_file_name = temp_o_file.name

    # Compile the C file to an object file
    result = subprocess.run(
        [compiler, "-c", temp_c_file_name]
        + shlex.split(flags)
        + ["-o", temp_o_file_name],
        capture_output=True,
        text=True,
    )
    compile_output = result.stdout + result.stderr

    if result.returncode == 0:

        # Create a temporary file for the raw bytes
        with tempfile.NamedTemporaryFile(suffix=".raw", delete=True) as raw_bytes_file:
            subprocess.run(
                [
                    "objcopy",
                    "--only-section",
                    ".text",
                    # XXX in reality we should probably look at the sections
                    "--only-section",
                    ".text.*",
                    "-O",
                    "binary",
                    temp_o_file_name,
                    raw_bytes_file.name,
                ]
            )
            compiled_bytes = raw_bytes_file.read()

        # Disassemble the object file
        disassembly = subprocess.run(
            ["objdump", "-dr", temp_o_file_name], capture_output=True, text=True
        ).stdout
        disassembly = trim_objdump(disassembly)


        # Relocs
        json_relocs = subprocess.run(
        [
            "llvm-readobj-19",
            "--elf-output-style=JSON",
            "--relocations",
            temp_o_file_name,
        ],
        capture_output=True,
        text=True,
        ).stdout
        json_relocs = json.loads(json_relocs)
        json_relocs = json_relocs[0]["Relocations"]
        json_relocs = [r["Relocation"] for d in json_relocs for r in d["Relocs"]]
        # Filter out .text
        json_relocs = [r for r in json_relocs if r["Symbol"]["Name"] != ".text"]
        return json_relocs, compiled_bytes, compile_output, disassembly
    else:
        return None, None, compile_output, None


def _reloc_type2size(s):
    match s:
        case "R_X86_64_PC32":
            return 4
        case "R_X86_64_PLT32":
            return 4
        case _:
            assert False, f"Unknown reloc {s}"


def _compute_relocs_byte_range(json_relocs):
    relocs_byte_range = [
        range(r["Offset"], r["Offset"] + _reloc_type2size(r["Type"]["Name"]))
        for r in json_relocs
    ]
    # Flatten relocs_byte_range
    relocs_byte_range = [i for r in relocs_byte_range for i in r]
    return relocs_byte_range


def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
    target_bytes = bytes.fromhex(target_bytes)
    compiled_relocs, compiled_bytes, compile_output, compiled_disassembly = compile(
        compiler, flags, source
    )
    target_disassembly = disassemble_bytes(target_bytes, disasm_arch, disasm_options)

    if compiled_bytes is not None:

        reloc_edit_distance, reloc_operations = print_match_summary(
            target_bytes,
            compiled_bytes,
            wildcard_offsets_seq2=_compute_relocs_byte_range(compiled_relocs),
        )
        print(f"reloc_edit_distance: {reloc_edit_distance}")
        print(f"reloc operations: {reloc_operations}")

        return (
            hexdump(compiled_bytes, result="return"),
            hexdump(target_bytes, result="return"),
            editdistance.eval(compiled_bytes, target_bytes),
            reloc_edit_distance,
            "\n".join(reloc_operations),
            compile_output,
            compiled_disassembly,
            compiled_relocs,
            target_disassembly,
        )
    else:
        return (
            "Compilation failed",
            hexdump(target_bytes, result="return"),
            -1,
            None,
            None,
            compile_output,
            compiled_disassembly,
            compiled_relocs,
            target_disassembly,
        )


def run():
    demo = gr.Interface(
        fn=predict,
        description=description,
        inputs=[
            gr.Textbox(
                lines=10,
                label="Bytes of Target Function (in hex)",
                value="b8 2a 00 00 00 c3",
            ),
            gr.Textbox(
                lines=10,
                label="Decompiled C Source Code",
                value="int x;\nint foo() { return x; }",
            ),
            gr.Textbox(label="Compiler", value="g++"),
            gr.Textbox(label="Compiler Flags", value="-O2"),
            gr.Textbox(label="Architecture (objdump -m)", value="i386"),
            gr.Textbox(label="Disassembler options (objdump -M)", value="x86-64"),
        ],
        outputs=[
            gr.Textbox(label="Compiled bytes"),
            gr.Textbox(label="Target bytes"),
            gr.Number(label="Edit distance (lower is better)"),
            gr.Number(label="Edit distance (ignoring relocs; lower is better)"),
            gr.Textbox(label="Edit description (ignoring relocs)"),
            gr.Textbox(label="Compiler Output"),
            gr.Textbox(label="Compiled Disassembly"),
            gr.JSON(label="Compiled relocations", open=True),
            gr.Textbox(label="Target Disassembly"),
        ],
    )

    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)


run()