Spaces:
Sleeping
Sleeping
Commit
·
3c8598e
1
Parent(s):
135b99f
format
Browse files
main.py
CHANGED
@@ -11,26 +11,40 @@ from dist import levenshtein_with_wildcard, print_match_summary
|
|
11 |
|
12 |
description = frontmatter.load("README.md").content
|
13 |
|
|
|
14 |
def trim(str, n):
|
15 |
return "\n".join(str.splitlines()[n:])
|
16 |
|
|
|
17 |
def trim_objdump(str):
|
18 |
return trim(str, 7)
|
19 |
|
|
|
20 |
def disassemble_bytes(byte_data, architecture, options):
|
21 |
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as temp_bin_file:
|
22 |
temp_bin_file.write(byte_data)
|
23 |
temp_bin_file_name = temp_bin_file.name
|
24 |
|
25 |
disassembly = subprocess.run(
|
26 |
-
[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
capture_output=True,
|
28 |
-
text=True
|
29 |
).stdout
|
30 |
disassembly = trim_objdump(disassembly)
|
31 |
|
32 |
return disassembly
|
33 |
|
|
|
34 |
def compile(compiler, flags, source):
|
35 |
# Create a temporary file for the C source code
|
36 |
with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file:
|
@@ -71,9 +85,7 @@ def compile(compiler, flags, source):
|
|
71 |
|
72 |
# Disassemble the object file
|
73 |
disassembly = subprocess.run(
|
74 |
-
["objdump", "-dr", temp_o_file_name],
|
75 |
-
capture_output=True,
|
76 |
-
text=True
|
77 |
).stdout
|
78 |
disassembly = trim_objdump(disassembly)
|
79 |
|
@@ -86,23 +98,27 @@ def compile(compiler, flags, source):
|
|
86 |
# relocs = trim(relocs, 3)
|
87 |
|
88 |
json_relocs = subprocess.run(
|
89 |
-
[
|
|
|
|
|
|
|
|
|
|
|
90 |
capture_output=True,
|
91 |
text=True,
|
92 |
).stdout
|
93 |
json_relocs = json.loads(json_relocs)
|
94 |
json_relocs = json_relocs[0]["Relocations"]
|
95 |
-
json_relocs = [r["Relocation"] for d in json_relocs for r in d[
|
96 |
# Filter out .text
|
97 |
json_relocs = [r for r in json_relocs if r["Symbol"]["Name"] != ".text"]
|
98 |
|
99 |
-
|
100 |
-
|
101 |
if result.returncode == 0:
|
102 |
return json_relocs, compiled_bytes, compile_output, disassembly
|
103 |
else:
|
104 |
return None, None, compile_output, disassembly
|
105 |
|
|
|
106 |
def _reloc_type2size(s):
|
107 |
match s:
|
108 |
case "R_X86_64_PC32":
|
@@ -112,20 +128,31 @@ def _reloc_type2size(s):
|
|
112 |
case _:
|
113 |
assert False, f"Unknown reloc {s}"
|
114 |
|
|
|
115 |
def _compute_relocs_byte_range(json_relocs):
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
|
|
|
|
|
|
|
|
120 |
|
121 |
def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
|
122 |
target_bytes = bytes.fromhex(target_bytes)
|
123 |
-
compiled_relocs, compiled_bytes, compile_output, compiled_disassembly = compile(
|
|
|
|
|
124 |
target_disassembly = disassemble_bytes(target_bytes, disasm_arch, disasm_options)
|
125 |
|
126 |
if compiled_bytes is not None:
|
127 |
|
128 |
-
reloc_edit_distance, reloc_operations = print_match_summary(
|
|
|
|
|
|
|
|
|
129 |
print(f"reloc_edit_distance: {reloc_edit_distance}")
|
130 |
print(f"reloc operations: {reloc_operations}")
|
131 |
|
@@ -138,7 +165,7 @@ def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
|
|
138 |
compile_output,
|
139 |
compiled_disassembly,
|
140 |
compiled_relocs,
|
141 |
-
target_disassembly
|
142 |
)
|
143 |
else:
|
144 |
return (
|
@@ -150,7 +177,7 @@ def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
|
|
150 |
compile_output,
|
151 |
compiled_disassembly,
|
152 |
compiled_relocs,
|
153 |
-
target_disassembly
|
154 |
)
|
155 |
|
156 |
|
@@ -172,7 +199,7 @@ def run():
|
|
172 |
gr.Textbox(label="Compiler", value="g++"),
|
173 |
gr.Textbox(label="Compiler Flags", value="-O2"),
|
174 |
gr.Textbox(label="Architecture (objdump -m)", value="i386"),
|
175 |
-
gr.Textbox(label="Disassembler options (objdump -M)", value="x86-64")
|
176 |
],
|
177 |
outputs=[
|
178 |
gr.Textbox(label="Compiled bytes"),
|
|
|
11 |
|
12 |
description = frontmatter.load("README.md").content
|
13 |
|
14 |
+
|
15 |
def trim(str, n):
|
16 |
return "\n".join(str.splitlines()[n:])
|
17 |
|
18 |
+
|
19 |
def trim_objdump(str):
|
20 |
return trim(str, 7)
|
21 |
|
22 |
+
|
23 |
def disassemble_bytes(byte_data, architecture, options):
|
24 |
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as temp_bin_file:
|
25 |
temp_bin_file.write(byte_data)
|
26 |
temp_bin_file_name = temp_bin_file.name
|
27 |
|
28 |
disassembly = subprocess.run(
|
29 |
+
[
|
30 |
+
"objdump",
|
31 |
+
"-D",
|
32 |
+
"-b",
|
33 |
+
"binary",
|
34 |
+
"-m",
|
35 |
+
architecture,
|
36 |
+
"-M",
|
37 |
+
options,
|
38 |
+
temp_bin_file_name,
|
39 |
+
],
|
40 |
capture_output=True,
|
41 |
+
text=True,
|
42 |
).stdout
|
43 |
disassembly = trim_objdump(disassembly)
|
44 |
|
45 |
return disassembly
|
46 |
|
47 |
+
|
48 |
def compile(compiler, flags, source):
|
49 |
# Create a temporary file for the C source code
|
50 |
with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file:
|
|
|
85 |
|
86 |
# Disassemble the object file
|
87 |
disassembly = subprocess.run(
|
88 |
+
["objdump", "-dr", temp_o_file_name], capture_output=True, text=True
|
|
|
|
|
89 |
).stdout
|
90 |
disassembly = trim_objdump(disassembly)
|
91 |
|
|
|
98 |
# relocs = trim(relocs, 3)
|
99 |
|
100 |
json_relocs = subprocess.run(
|
101 |
+
[
|
102 |
+
"llvm-readobj-19",
|
103 |
+
"--elf-output-style=JSON",
|
104 |
+
"--relocations",
|
105 |
+
temp_o_file_name,
|
106 |
+
],
|
107 |
capture_output=True,
|
108 |
text=True,
|
109 |
).stdout
|
110 |
json_relocs = json.loads(json_relocs)
|
111 |
json_relocs = json_relocs[0]["Relocations"]
|
112 |
+
json_relocs = [r["Relocation"] for d in json_relocs for r in d["Relocs"]]
|
113 |
# Filter out .text
|
114 |
json_relocs = [r for r in json_relocs if r["Symbol"]["Name"] != ".text"]
|
115 |
|
|
|
|
|
116 |
if result.returncode == 0:
|
117 |
return json_relocs, compiled_bytes, compile_output, disassembly
|
118 |
else:
|
119 |
return None, None, compile_output, disassembly
|
120 |
|
121 |
+
|
122 |
def _reloc_type2size(s):
|
123 |
match s:
|
124 |
case "R_X86_64_PC32":
|
|
|
128 |
case _:
|
129 |
assert False, f"Unknown reloc {s}"
|
130 |
|
131 |
+
|
132 |
def _compute_relocs_byte_range(json_relocs):
|
133 |
+
relocs_byte_range = [
|
134 |
+
range(r["Offset"], r["Offset"] + _reloc_type2size(r["Type"]["Name"]))
|
135 |
+
for r in json_relocs
|
136 |
+
]
|
137 |
+
# Flatten relocs_byte_range
|
138 |
+
relocs_byte_range = [i for r in relocs_byte_range for i in r]
|
139 |
+
return relocs_byte_range
|
140 |
+
|
141 |
|
142 |
def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
|
143 |
target_bytes = bytes.fromhex(target_bytes)
|
144 |
+
compiled_relocs, compiled_bytes, compile_output, compiled_disassembly = compile(
|
145 |
+
compiler, flags, source
|
146 |
+
)
|
147 |
target_disassembly = disassemble_bytes(target_bytes, disasm_arch, disasm_options)
|
148 |
|
149 |
if compiled_bytes is not None:
|
150 |
|
151 |
+
reloc_edit_distance, reloc_operations = print_match_summary(
|
152 |
+
target_bytes,
|
153 |
+
compiled_bytes,
|
154 |
+
wildcard_offsets_seq2=_compute_relocs_byte_range(compiled_relocs),
|
155 |
+
)
|
156 |
print(f"reloc_edit_distance: {reloc_edit_distance}")
|
157 |
print(f"reloc operations: {reloc_operations}")
|
158 |
|
|
|
165 |
compile_output,
|
166 |
compiled_disassembly,
|
167 |
compiled_relocs,
|
168 |
+
target_disassembly,
|
169 |
)
|
170 |
else:
|
171 |
return (
|
|
|
177 |
compile_output,
|
178 |
compiled_disassembly,
|
179 |
compiled_relocs,
|
180 |
+
target_disassembly,
|
181 |
)
|
182 |
|
183 |
|
|
|
199 |
gr.Textbox(label="Compiler", value="g++"),
|
200 |
gr.Textbox(label="Compiler Flags", value="-O2"),
|
201 |
gr.Textbox(label="Architecture (objdump -m)", value="i386"),
|
202 |
+
gr.Textbox(label="Disassembler options (objdump -M)", value="x86-64"),
|
203 |
],
|
204 |
outputs=[
|
205 |
gr.Textbox(label="Compiled bytes"),
|