ejschwartz commited on
Commit
8427d96
·
1 Parent(s): dda3a0e

Adjust dist api

Browse files
Files changed (2) hide show
  1. dist.py +116 -47
  2. main.py +5 -0
dist.py CHANGED
@@ -1,17 +1,22 @@
1
- def levenshtein_with_wildcards(str1, str2, wildcard='?', verbose=False):
2
  """
3
- Calculate the Levenshtein distance between two strings with support for wildcards.
4
 
5
  Args:
6
  str1 (str): The first string.
7
  str2 (str): The second string.
8
- wildcard (str, optional): The wildcard character. Defaults to '?'.
 
9
  verbose (bool, optional): If True, prints the DP matrix and explains the process.
10
 
11
  Returns:
12
  int: The Levenshtein distance between the two strings.
13
  list: If verbose=True, also returns a list of operations performed.
14
  """
 
 
 
 
15
  m, n = len(str1), len(str2)
16
 
17
  # Create a matrix of size (m+1) x (n+1)
@@ -27,8 +32,12 @@ def levenshtein_with_wildcards(str1, str2, wildcard='?', verbose=False):
27
  # Fill the dp matrix
28
  for i in range(1, m + 1):
29
  for j in range(1, n + 1):
30
- # If either character is a wildcard, treat it as a match (cost = 0)
31
- if str1[i - 1] == wildcard or str2[j - 1] == wildcard:
 
 
 
 
32
  dp[i][j] = dp[i - 1][j - 1] # No cost for wildcard matches
33
  else:
34
  cost = 0 if str1[i - 1] == str2[j - 1] else 1
@@ -39,12 +48,12 @@ def levenshtein_with_wildcards(str1, str2, wildcard='?', verbose=False):
39
  )
40
 
41
  if verbose:
42
- operations = explain_match(str1, str2, dp, wildcard)
43
  return dp[m][n], operations
44
 
45
  return dp[m][n]
46
 
47
- def explain_match(str1, str2, dp, wildcard='?'):
48
  """
49
  Traces the optimal alignment path and explains each step of the matching process.
50
 
@@ -52,7 +61,8 @@ def explain_match(str1, str2, dp, wildcard='?'):
52
  str1 (str): The first string.
53
  str2 (str): The second string.
54
  dp (list): The dynamic programming matrix.
55
- wildcard (str, optional): The wildcard character. Defaults to '?'.
 
56
 
57
  Returns:
58
  list: A list of explanation strings for each operation performed.
@@ -96,74 +106,127 @@ def explain_match(str1, str2, dp, wildcard='?'):
96
 
97
  # Diagonal move (match or substitution)
98
  if curr_i > prev_i and curr_j > prev_j:
99
- char1 = str1[curr_i-1]
100
- char2 = str2[curr_j-1]
 
 
 
 
 
101
 
102
- if char1 == wildcard or char2 == wildcard:
103
- wildcard_char = char1 if char1 == wildcard else char2
104
- match_char = char2 if char1 == wildcard else char1
105
- operations.append(f"Wildcard match: '{wildcard_char}' matches any character, here '{match_char}'")
 
 
106
  elif char1 == char2:
107
- operations.append(f"Match: '{char1}' matches '{char2}'")
108
  else:
109
- operations.append(f"Substitution: Replace '{char1}' with '{char2}'")
110
 
111
  # Horizontal move (insertion)
112
  elif curr_i == prev_i and curr_j > prev_j:
113
- operations.append(f"Insertion: Insert '{str2[curr_j-1]}'")
 
114
 
115
  # Vertical move (deletion)
116
  elif curr_i > prev_i and curr_j == prev_j:
117
- operations.append(f"Deletion: Delete '{str1[curr_i-1]}'")
 
118
 
119
  return operations
120
 
121
- def print_match_summary(str1, str2, wildcard='?'):
122
  """
123
- Prints a summary of the match between two strings, highlighting wildcards.
124
 
125
  Args:
126
  str1 (str): The first string.
127
  str2 (str): The second string.
128
- wildcard (str, optional): The wildcard character. Defaults to '?'.
 
129
  """
130
- distance, operations = levenshtein_with_wildcards(str1, str2, wildcard, verbose=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
- print(f"Comparing '{str1}' and '{str2}' (wildcard: '{wildcard}')")
 
 
133
  print(f"Edit distance: {distance}")
134
  print("\nMatch process:")
135
 
136
  for i, op in enumerate(operations):
137
  print(f"Step {i+1}: {op}")
138
 
139
- # Visual representation
140
- alignment = []
141
  i, j = 0, 0
142
  aligned_str1 = ""
143
  aligned_str2 = ""
144
  match_indicators = ""
145
 
146
  for op in operations:
147
- if "match" in op or "Match" in op or "Substitution" in op:
148
- aligned_str1 += str1[i]
149
- aligned_str2 += str2[j]
150
 
151
- if "Wildcard" in op:
 
 
 
 
 
 
 
 
 
 
 
 
152
  match_indicators += "*" # Wildcard match
153
- elif "Match" in op:
154
  match_indicators += "|" # Exact match
155
  else:
156
  match_indicators += "X" # Substitution
157
 
158
  i += 1
159
  j += 1
160
- elif "Insertion" in op:
161
  aligned_str1 += "-"
162
- aligned_str2 += str2[j]
 
 
 
 
 
163
  match_indicators += " "
164
  j += 1
165
- elif "Deletion" in op:
166
- aligned_str1 += str1[i]
 
 
 
 
167
  aligned_str2 += "-"
168
  match_indicators += " "
169
  i += 1
@@ -173,10 +236,10 @@ def print_match_summary(str1, str2, wildcard='?'):
173
  print(match_indicators)
174
  print(aligned_str2)
175
  print("\nLegend:")
176
- print("| = exact match, * = wildcard match, X = substitution, - = gap (insertion/deletion)")
177
 
178
  # Summary of wildcard matches
179
- wildcard_matches = [op for op in operations if "Wildcard" in op]
180
  if wildcard_matches:
181
  print("\nWildcard matches:")
182
  for match in wildcard_matches:
@@ -186,14 +249,20 @@ def print_match_summary(str1, str2, wildcard='?'):
186
 
187
  # Example usage
188
  if __name__ == "__main__":
189
- # Basic examples
190
- print_match_summary("hello", "hello") # 0 (identical strings)
191
- print_match_summary("hello", "hallo") # 1 (one substitution)
192
- print_match_summary("he?lo", "hello") # 0 (wildcard matches 'l')
193
- print_match_summary("he?lo", "hallo") # 0 (wildcard matches 'a')
194
- print_match_summary("h?llo", "hello") # 0 (wildcard matches 'e')
195
- print_match_summary("h?llo", "hillo") # 0 (wildcard matches 'i')
196
- print_match_summary("c?t", "cat") # 0 (wildcard matches 'a')
197
- print_match_summary("c?t", "cut") # 0 (wildcard matches 'u')
198
- print_match_summary("w?rd", "word") # 0 (wildcard matches 'o')
199
- print_match_summary("d?g", "dog") # 0 (wildcard matches 'o')
 
 
 
 
 
 
 
1
+ def levenshtein_with_wildcard(str1, str2, wildcard_offsets_str1=None, wildcard_offsets_str2=None, verbose=False):
2
  """
3
+ Calculate the Levenshtein distance between two strings with support for wildcards at specific positions.
4
 
5
  Args:
6
  str1 (str): The first string.
7
  str2 (str): The second string.
8
+ wildcard_offsets_str1 (iterable, optional): Indices in str1 that are wildcards. Defaults to None.
9
+ wildcard_offsets_str2 (iterable, optional): Indices in str2 that are wildcards. Defaults to None.
10
  verbose (bool, optional): If True, prints the DP matrix and explains the process.
11
 
12
  Returns:
13
  int: The Levenshtein distance between the two strings.
14
  list: If verbose=True, also returns a list of operations performed.
15
  """
16
+ # Initialize empty sets if None
17
+ wildcard_offsets_str1 = set(wildcard_offsets_str1 or [])
18
+ wildcard_offsets_str2 = set(wildcard_offsets_str2 or [])
19
+
20
  m, n = len(str1), len(str2)
21
 
22
  # Create a matrix of size (m+1) x (n+1)
 
32
  # Fill the dp matrix
33
  for i in range(1, m + 1):
34
  for j in range(1, n + 1):
35
+ # Check if either position is a wildcard
36
+ is_str1_wildcard = (i - 1) in wildcard_offsets_str1
37
+ is_str2_wildcard = (j - 1) in wildcard_offsets_str2
38
+
39
+ # If either position is a wildcard, treat it as a match (cost = 0)
40
+ if is_str1_wildcard or is_str2_wildcard:
41
  dp[i][j] = dp[i - 1][j - 1] # No cost for wildcard matches
42
  else:
43
  cost = 0 if str1[i - 1] == str2[j - 1] else 1
 
48
  )
49
 
50
  if verbose:
51
+ operations = explain_match(str1, str2, dp, wildcard_offsets_str1, wildcard_offsets_str2)
52
  return dp[m][n], operations
53
 
54
  return dp[m][n]
55
 
56
+ def explain_match(str1, str2, dp, wildcard_offsets_str1, wildcard_offsets_str2):
57
  """
58
  Traces the optimal alignment path and explains each step of the matching process.
59
 
 
61
  str1 (str): The first string.
62
  str2 (str): The second string.
63
  dp (list): The dynamic programming matrix.
64
+ wildcard_offsets_str1 (set): Indices in str1 that are wildcards.
65
+ wildcard_offsets_str2 (set): Indices in str2 that are wildcards.
66
 
67
  Returns:
68
  list: A list of explanation strings for each operation performed.
 
106
 
107
  # Diagonal move (match or substitution)
108
  if curr_i > prev_i and curr_j > prev_j:
109
+ char1_idx = curr_i-1
110
+ char2_idx = curr_j-1
111
+ char1 = str1[char1_idx]
112
+ char2 = str2[char2_idx]
113
+
114
+ is_str1_wildcard = char1_idx in wildcard_offsets_str1
115
+ is_str2_wildcard = char2_idx in wildcard_offsets_str2
116
 
117
+ if is_str1_wildcard and is_str2_wildcard:
118
+ operations.append(f"Double wildcard: Position {char1_idx} in str1 and position {char2_idx} in str2 are both wildcards")
119
+ elif is_str1_wildcard:
120
+ operations.append(f"Wildcard match: Position {char1_idx} in str1 is a wildcard, matches '{char2}' at position {char2_idx} in str2")
121
+ elif is_str2_wildcard:
122
+ operations.append(f"Wildcard match: Position {char2_idx} in str2 is a wildcard, matches '{char1}' at position {char1_idx} in str1")
123
  elif char1 == char2:
124
+ operations.append(f"Match: '{char1}' at position {char1_idx} matches '{char2}' at position {char2_idx}")
125
  else:
126
+ operations.append(f"Substitution: Replace '{char1}' at position {char1_idx} with '{char2}' at position {char2_idx}")
127
 
128
  # Horizontal move (insertion)
129
  elif curr_i == prev_i and curr_j > prev_j:
130
+ char_idx = curr_j-1
131
+ operations.append(f"Insertion: Insert '{str2[char_idx]}' at position {char_idx} in str2")
132
 
133
  # Vertical move (deletion)
134
  elif curr_i > prev_i and curr_j == prev_j:
135
+ char_idx = curr_i-1
136
+ operations.append(f"Deletion: Delete '{str1[char_idx]}' at position {char_idx} in str1")
137
 
138
  return operations
139
 
140
+ def print_match_summary(str1, str2, wildcard_offsets_str1=None, wildcard_offsets_str2=None):
141
  """
142
+ Prints a summary of the match between two strings, highlighting wildcards by their offsets.
143
 
144
  Args:
145
  str1 (str): The first string.
146
  str2 (str): The second string.
147
+ wildcard_offsets_str1 (iterable, optional): Indices in str1 that are wildcards. Defaults to None.
148
+ wildcard_offsets_str2 (iterable, optional): Indices in str2 that are wildcards. Defaults to None.
149
  """
150
+ # Initialize empty lists if None
151
+ wildcard_offsets_str1 = set(wildcard_offsets_str1 or [])
152
+ wildcard_offsets_str2 = set(wildcard_offsets_str2 or [])
153
+
154
+ distance, operations = levenshtein_with_wildcard(
155
+ str1, str2, wildcard_offsets_str1, wildcard_offsets_str2, verbose=True
156
+ )
157
+
158
+ # Create visual representations of the strings with wildcard markers
159
+ str1_visual = ""
160
+ for i, char in enumerate(str1):
161
+ if i in wildcard_offsets_str1:
162
+ str1_visual += f"[{char}]" # Mark wildcards with brackets
163
+ else:
164
+ str1_visual += char
165
+
166
+ str2_visual = ""
167
+ for i, char in enumerate(str2):
168
+ if i in wildcard_offsets_str2:
169
+ str2_visual += f"[{char}]" # Mark wildcards with brackets
170
+ else:
171
+ str2_visual += char
172
 
173
+ print(f"Comparing '{str1_visual}' and '{str2_visual}'")
174
+ print(f"Wildcards in str1: {sorted(wildcard_offsets_str1)}")
175
+ print(f"Wildcards in str2: {sorted(wildcard_offsets_str2)}")
176
  print(f"Edit distance: {distance}")
177
  print("\nMatch process:")
178
 
179
  for i, op in enumerate(operations):
180
  print(f"Step {i+1}: {op}")
181
 
182
+ # Visual representation of the alignment
 
183
  i, j = 0, 0
184
  aligned_str1 = ""
185
  aligned_str2 = ""
186
  match_indicators = ""
187
 
188
  for op in operations:
189
+ if "Match:" in op or "Substitution:" in op or "Wildcard match:" in op or "Double wildcard:" in op:
190
+ is_str1_wildcard = i in wildcard_offsets_str1
191
+ is_str2_wildcard = j in wildcard_offsets_str2
192
 
193
+ # Add brackets around wildcards
194
+ if is_str1_wildcard:
195
+ aligned_str1 += f"[{str1[i]}]"
196
+ else:
197
+ aligned_str1 += str1[i]
198
+
199
+ if is_str2_wildcard:
200
+ aligned_str2 += f"[{str2[j]}]"
201
+ else:
202
+ aligned_str2 += str2[j]
203
+
204
+ # Determine match indicator
205
+ if "Wildcard match:" in op or "Double wildcard:" in op:
206
  match_indicators += "*" # Wildcard match
207
+ elif "Match:" in op:
208
  match_indicators += "|" # Exact match
209
  else:
210
  match_indicators += "X" # Substitution
211
 
212
  i += 1
213
  j += 1
214
+ elif "Insertion:" in op:
215
  aligned_str1 += "-"
216
+
217
+ if j in wildcard_offsets_str2:
218
+ aligned_str2 += f"[{str2[j]}]"
219
+ else:
220
+ aligned_str2 += str2[j]
221
+
222
  match_indicators += " "
223
  j += 1
224
+ elif "Deletion:" in op:
225
+ if i in wildcard_offsets_str1:
226
+ aligned_str1 += f"[{str1[i]}]"
227
+ else:
228
+ aligned_str1 += str1[i]
229
+
230
  aligned_str2 += "-"
231
  match_indicators += " "
232
  i += 1
 
236
  print(match_indicators)
237
  print(aligned_str2)
238
  print("\nLegend:")
239
+ print("| = exact match, * = wildcard match, X = substitution, - = gap (insertion/deletion), [c] = wildcard position")
240
 
241
  # Summary of wildcard matches
242
+ wildcard_matches = [op for op in operations if "Wildcard match:" in op or "Double wildcard:" in op]
243
  if wildcard_matches:
244
  print("\nWildcard matches:")
245
  for match in wildcard_matches:
 
249
 
250
  # Example usage
251
  if __name__ == "__main__":
252
+ # Example 1: "hello" vs "hello" with no wildcards
253
+ print_match_summary("hello", "hello")
254
+
255
+ # Example 2: "hello" vs "hallo" with no wildcards - expect distance of 1
256
+ print_match_summary("hello", "hallo")
257
+
258
+ # Example 3: "hello" with 3rd position (index 2) as wildcard vs "hallo" - expect distance of 0
259
+ print_match_summary("hello", "hallo", wildcard_offsets_str1=[2])
260
+
261
+ # Example 4: "hello" vs "hillo" with 2nd position (index 1) as wildcard in str2 - expect distance of 0
262
+ print_match_summary("hello", "hillo", wildcard_offsets_str2=[1])
263
+
264
+ # Example 5: Multiple wildcards in str1
265
+ print_match_summary("hello", "haxyz", wildcard_offsets_str1=[2, 3, 4])
266
+
267
+ # Example 6: Wildcards in both strings at different positions
268
+ print_match_summary("hello", "howdy", wildcard_offsets_str1=[2], wildcard_offsets_str2=[3, 4])
main.py CHANGED
@@ -106,7 +106,12 @@ def compile(compiler, flags, source):
106
  assert False, f"Unknown reloc {s}"
107
 
108
  relocs_byte_range = [range(r["Offset"], r["Offset"] + reloc_type2size(r["Type"]["Name"])) for r in json_relocs]
 
 
 
 
109
  print(f"relocs: {relocs_byte_range}")
 
110
 
111
  if result.returncode == 0:
112
  return json_relocs, compiled_bytes, compile_output, disassembly
 
106
  assert False, f"Unknown reloc {s}"
107
 
108
  relocs_byte_range = [range(r["Offset"], r["Offset"] + reloc_type2size(r["Type"]["Name"])) for r in json_relocs]
109
+ # Flatten relocs_byte_range
110
+ relocs_byte_range = [i for r in relocs_byte_range for i in r]
111
+
112
+
113
  print(f"relocs: {relocs_byte_range}")
114
+ print(print_match_summary(b"lol", relocs_byte_range, wildcard_offsets_str2=relocs_byte_range))
115
 
116
  if result.returncode == 0:
117
  return json_relocs, compiled_bytes, compile_output, disassembly