ejschwartz commited on
Commit
3c8598e
·
1 Parent(s): 135b99f
Files changed (1) hide show
  1. main.py +45 -18
main.py CHANGED
@@ -11,26 +11,40 @@ from dist import levenshtein_with_wildcard, print_match_summary
11
 
12
  description = frontmatter.load("README.md").content
13
 
 
14
  def trim(str, n):
15
  return "\n".join(str.splitlines()[n:])
16
 
 
17
  def trim_objdump(str):
18
  return trim(str, 7)
19
 
 
20
  def disassemble_bytes(byte_data, architecture, options):
21
  with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as temp_bin_file:
22
  temp_bin_file.write(byte_data)
23
  temp_bin_file_name = temp_bin_file.name
24
 
25
  disassembly = subprocess.run(
26
- ["objdump", "-D", "-b", "binary", "-m", architecture, "-M", options, temp_bin_file_name],
 
 
 
 
 
 
 
 
 
 
27
  capture_output=True,
28
- text=True
29
  ).stdout
30
  disassembly = trim_objdump(disassembly)
31
 
32
  return disassembly
33
 
 
34
  def compile(compiler, flags, source):
35
  # Create a temporary file for the C source code
36
  with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file:
@@ -71,9 +85,7 @@ def compile(compiler, flags, source):
71
 
72
  # Disassemble the object file
73
  disassembly = subprocess.run(
74
- ["objdump", "-dr", temp_o_file_name],
75
- capture_output=True,
76
- text=True
77
  ).stdout
78
  disassembly = trim_objdump(disassembly)
79
 
@@ -86,23 +98,27 @@ def compile(compiler, flags, source):
86
  # relocs = trim(relocs, 3)
87
 
88
  json_relocs = subprocess.run(
89
- ["llvm-readobj-19", "--elf-output-style=JSON", "--relocations", temp_o_file_name],
 
 
 
 
 
90
  capture_output=True,
91
  text=True,
92
  ).stdout
93
  json_relocs = json.loads(json_relocs)
94
  json_relocs = json_relocs[0]["Relocations"]
95
- json_relocs = [r["Relocation"] for d in json_relocs for r in d['Relocs']]
96
  # Filter out .text
97
  json_relocs = [r for r in json_relocs if r["Symbol"]["Name"] != ".text"]
98
 
99
-
100
-
101
  if result.returncode == 0:
102
  return json_relocs, compiled_bytes, compile_output, disassembly
103
  else:
104
  return None, None, compile_output, disassembly
105
 
 
106
  def _reloc_type2size(s):
107
  match s:
108
  case "R_X86_64_PC32":
@@ -112,20 +128,31 @@ def _reloc_type2size(s):
112
  case _:
113
  assert False, f"Unknown reloc {s}"
114
 
 
115
  def _compute_relocs_byte_range(json_relocs):
116
- relocs_byte_range = [range(r["Offset"], r["Offset"] + _reloc_type2size(r["Type"]["Name"])) for r in json_relocs]
117
- # Flatten relocs_byte_range
118
- relocs_byte_range = [i for r in relocs_byte_range for i in r]
119
- return relocs_byte_range
 
 
 
 
120
 
121
  def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
122
  target_bytes = bytes.fromhex(target_bytes)
123
- compiled_relocs, compiled_bytes, compile_output, compiled_disassembly = compile(compiler, flags, source)
 
 
124
  target_disassembly = disassemble_bytes(target_bytes, disasm_arch, disasm_options)
125
 
126
  if compiled_bytes is not None:
127
 
128
- reloc_edit_distance, reloc_operations = print_match_summary(target_bytes, compiled_bytes, wildcard_offsets_seq2=_compute_relocs_byte_range(compiled_relocs))
 
 
 
 
129
  print(f"reloc_edit_distance: {reloc_edit_distance}")
130
  print(f"reloc operations: {reloc_operations}")
131
 
@@ -138,7 +165,7 @@ def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
138
  compile_output,
139
  compiled_disassembly,
140
  compiled_relocs,
141
- target_disassembly
142
  )
143
  else:
144
  return (
@@ -150,7 +177,7 @@ def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
150
  compile_output,
151
  compiled_disassembly,
152
  compiled_relocs,
153
- target_disassembly
154
  )
155
 
156
 
@@ -172,7 +199,7 @@ def run():
172
  gr.Textbox(label="Compiler", value="g++"),
173
  gr.Textbox(label="Compiler Flags", value="-O2"),
174
  gr.Textbox(label="Architecture (objdump -m)", value="i386"),
175
- gr.Textbox(label="Disassembler options (objdump -M)", value="x86-64")
176
  ],
177
  outputs=[
178
  gr.Textbox(label="Compiled bytes"),
 
11
 
12
  description = frontmatter.load("README.md").content
13
 
14
+
15
  def trim(str, n):
16
  return "\n".join(str.splitlines()[n:])
17
 
18
+
19
  def trim_objdump(str):
20
  return trim(str, 7)
21
 
22
+
23
  def disassemble_bytes(byte_data, architecture, options):
24
  with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as temp_bin_file:
25
  temp_bin_file.write(byte_data)
26
  temp_bin_file_name = temp_bin_file.name
27
 
28
  disassembly = subprocess.run(
29
+ [
30
+ "objdump",
31
+ "-D",
32
+ "-b",
33
+ "binary",
34
+ "-m",
35
+ architecture,
36
+ "-M",
37
+ options,
38
+ temp_bin_file_name,
39
+ ],
40
  capture_output=True,
41
+ text=True,
42
  ).stdout
43
  disassembly = trim_objdump(disassembly)
44
 
45
  return disassembly
46
 
47
+
48
  def compile(compiler, flags, source):
49
  # Create a temporary file for the C source code
50
  with tempfile.NamedTemporaryFile(suffix=".c", delete=False) as temp_c_file:
 
85
 
86
  # Disassemble the object file
87
  disassembly = subprocess.run(
88
+ ["objdump", "-dr", temp_o_file_name], capture_output=True, text=True
 
 
89
  ).stdout
90
  disassembly = trim_objdump(disassembly)
91
 
 
98
  # relocs = trim(relocs, 3)
99
 
100
  json_relocs = subprocess.run(
101
+ [
102
+ "llvm-readobj-19",
103
+ "--elf-output-style=JSON",
104
+ "--relocations",
105
+ temp_o_file_name,
106
+ ],
107
  capture_output=True,
108
  text=True,
109
  ).stdout
110
  json_relocs = json.loads(json_relocs)
111
  json_relocs = json_relocs[0]["Relocations"]
112
+ json_relocs = [r["Relocation"] for d in json_relocs for r in d["Relocs"]]
113
  # Filter out .text
114
  json_relocs = [r for r in json_relocs if r["Symbol"]["Name"] != ".text"]
115
 
 
 
116
  if result.returncode == 0:
117
  return json_relocs, compiled_bytes, compile_output, disassembly
118
  else:
119
  return None, None, compile_output, disassembly
120
 
121
+
122
  def _reloc_type2size(s):
123
  match s:
124
  case "R_X86_64_PC32":
 
128
  case _:
129
  assert False, f"Unknown reloc {s}"
130
 
131
+
132
  def _compute_relocs_byte_range(json_relocs):
133
+ relocs_byte_range = [
134
+ range(r["Offset"], r["Offset"] + _reloc_type2size(r["Type"]["Name"]))
135
+ for r in json_relocs
136
+ ]
137
+ # Flatten relocs_byte_range
138
+ relocs_byte_range = [i for r in relocs_byte_range for i in r]
139
+ return relocs_byte_range
140
+
141
 
142
  def predict(target_bytes, source, compiler, flags, disasm_arch, disasm_options):
143
  target_bytes = bytes.fromhex(target_bytes)
144
+ compiled_relocs, compiled_bytes, compile_output, compiled_disassembly = compile(
145
+ compiler, flags, source
146
+ )
147
  target_disassembly = disassemble_bytes(target_bytes, disasm_arch, disasm_options)
148
 
149
  if compiled_bytes is not None:
150
 
151
+ reloc_edit_distance, reloc_operations = print_match_summary(
152
+ target_bytes,
153
+ compiled_bytes,
154
+ wildcard_offsets_seq2=_compute_relocs_byte_range(compiled_relocs),
155
+ )
156
  print(f"reloc_edit_distance: {reloc_edit_distance}")
157
  print(f"reloc operations: {reloc_operations}")
158
 
 
165
  compile_output,
166
  compiled_disassembly,
167
  compiled_relocs,
168
+ target_disassembly,
169
  )
170
  else:
171
  return (
 
177
  compile_output,
178
  compiled_disassembly,
179
  compiled_relocs,
180
+ target_disassembly,
181
  )
182
 
183
 
 
199
  gr.Textbox(label="Compiler", value="g++"),
200
  gr.Textbox(label="Compiler Flags", value="-O2"),
201
  gr.Textbox(label="Architecture (objdump -m)", value="i386"),
202
+ gr.Textbox(label="Disassembler options (objdump -M)", value="x86-64"),
203
  ],
204
  outputs=[
205
  gr.Textbox(label="Compiled bytes"),