Spaces:
Sleeping
Sleeping
Commit
·
8427d96
1
Parent(s):
dda3a0e
Adjust dist api
Browse files
dist.py
CHANGED
@@ -1,17 +1,22 @@
|
|
1 |
-
def
|
2 |
"""
|
3 |
-
Calculate the Levenshtein distance between two strings with support for wildcards.
|
4 |
|
5 |
Args:
|
6 |
str1 (str): The first string.
|
7 |
str2 (str): The second string.
|
8 |
-
|
|
|
9 |
verbose (bool, optional): If True, prints the DP matrix and explains the process.
|
10 |
|
11 |
Returns:
|
12 |
int: The Levenshtein distance between the two strings.
|
13 |
list: If verbose=True, also returns a list of operations performed.
|
14 |
"""
|
|
|
|
|
|
|
|
|
15 |
m, n = len(str1), len(str2)
|
16 |
|
17 |
# Create a matrix of size (m+1) x (n+1)
|
@@ -27,8 +32,12 @@ def levenshtein_with_wildcards(str1, str2, wildcard='?', verbose=False):
|
|
27 |
# Fill the dp matrix
|
28 |
for i in range(1, m + 1):
|
29 |
for j in range(1, n + 1):
|
30 |
-
#
|
31 |
-
|
|
|
|
|
|
|
|
|
32 |
dp[i][j] = dp[i - 1][j - 1] # No cost for wildcard matches
|
33 |
else:
|
34 |
cost = 0 if str1[i - 1] == str2[j - 1] else 1
|
@@ -39,12 +48,12 @@ def levenshtein_with_wildcards(str1, str2, wildcard='?', verbose=False):
|
|
39 |
)
|
40 |
|
41 |
if verbose:
|
42 |
-
operations = explain_match(str1, str2, dp,
|
43 |
return dp[m][n], operations
|
44 |
|
45 |
return dp[m][n]
|
46 |
|
47 |
-
def explain_match(str1, str2, dp,
|
48 |
"""
|
49 |
Traces the optimal alignment path and explains each step of the matching process.
|
50 |
|
@@ -52,7 +61,8 @@ def explain_match(str1, str2, dp, wildcard='?'):
|
|
52 |
str1 (str): The first string.
|
53 |
str2 (str): The second string.
|
54 |
dp (list): The dynamic programming matrix.
|
55 |
-
|
|
|
56 |
|
57 |
Returns:
|
58 |
list: A list of explanation strings for each operation performed.
|
@@ -96,74 +106,127 @@ def explain_match(str1, str2, dp, wildcard='?'):
|
|
96 |
|
97 |
# Diagonal move (match or substitution)
|
98 |
if curr_i > prev_i and curr_j > prev_j:
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
-
if
|
103 |
-
|
104 |
-
|
105 |
-
operations.append(f"Wildcard match:
|
|
|
|
|
106 |
elif char1 == char2:
|
107 |
-
operations.append(f"Match: '{char1}' matches '{char2}'")
|
108 |
else:
|
109 |
-
operations.append(f"Substitution: Replace '{char1}' with '{char2}'")
|
110 |
|
111 |
# Horizontal move (insertion)
|
112 |
elif curr_i == prev_i and curr_j > prev_j:
|
113 |
-
|
|
|
114 |
|
115 |
# Vertical move (deletion)
|
116 |
elif curr_i > prev_i and curr_j == prev_j:
|
117 |
-
|
|
|
118 |
|
119 |
return operations
|
120 |
|
121 |
-
def print_match_summary(str1, str2,
|
122 |
"""
|
123 |
-
Prints a summary of the match between two strings, highlighting wildcards.
|
124 |
|
125 |
Args:
|
126 |
str1 (str): The first string.
|
127 |
str2 (str): The second string.
|
128 |
-
|
|
|
129 |
"""
|
130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
-
print(f"Comparing '{
|
|
|
|
|
133 |
print(f"Edit distance: {distance}")
|
134 |
print("\nMatch process:")
|
135 |
|
136 |
for i, op in enumerate(operations):
|
137 |
print(f"Step {i+1}: {op}")
|
138 |
|
139 |
-
# Visual representation
|
140 |
-
alignment = []
|
141 |
i, j = 0, 0
|
142 |
aligned_str1 = ""
|
143 |
aligned_str2 = ""
|
144 |
match_indicators = ""
|
145 |
|
146 |
for op in operations:
|
147 |
-
if "
|
148 |
-
|
149 |
-
|
150 |
|
151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
match_indicators += "*" # Wildcard match
|
153 |
-
elif "Match" in op:
|
154 |
match_indicators += "|" # Exact match
|
155 |
else:
|
156 |
match_indicators += "X" # Substitution
|
157 |
|
158 |
i += 1
|
159 |
j += 1
|
160 |
-
elif "Insertion" in op:
|
161 |
aligned_str1 += "-"
|
162 |
-
|
|
|
|
|
|
|
|
|
|
|
163 |
match_indicators += " "
|
164 |
j += 1
|
165 |
-
elif "Deletion" in op:
|
166 |
-
|
|
|
|
|
|
|
|
|
167 |
aligned_str2 += "-"
|
168 |
match_indicators += " "
|
169 |
i += 1
|
@@ -173,10 +236,10 @@ def print_match_summary(str1, str2, wildcard='?'):
|
|
173 |
print(match_indicators)
|
174 |
print(aligned_str2)
|
175 |
print("\nLegend:")
|
176 |
-
print("| = exact match, * = wildcard match, X = substitution, - = gap (insertion/deletion)")
|
177 |
|
178 |
# Summary of wildcard matches
|
179 |
-
wildcard_matches = [op for op in operations if "Wildcard" in op]
|
180 |
if wildcard_matches:
|
181 |
print("\nWildcard matches:")
|
182 |
for match in wildcard_matches:
|
@@ -186,14 +249,20 @@ def print_match_summary(str1, str2, wildcard='?'):
|
|
186 |
|
187 |
# Example usage
|
188 |
if __name__ == "__main__":
|
189 |
-
#
|
190 |
-
print_match_summary("hello", "hello")
|
191 |
-
|
192 |
-
|
193 |
-
print_match_summary("
|
194 |
-
|
195 |
-
|
196 |
-
print_match_summary("
|
197 |
-
|
198 |
-
|
199 |
-
print_match_summary("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def levenshtein_with_wildcard(str1, str2, wildcard_offsets_str1=None, wildcard_offsets_str2=None, verbose=False):
|
2 |
"""
|
3 |
+
Calculate the Levenshtein distance between two strings with support for wildcards at specific positions.
|
4 |
|
5 |
Args:
|
6 |
str1 (str): The first string.
|
7 |
str2 (str): The second string.
|
8 |
+
wildcard_offsets_str1 (iterable, optional): Indices in str1 that are wildcards. Defaults to None.
|
9 |
+
wildcard_offsets_str2 (iterable, optional): Indices in str2 that are wildcards. Defaults to None.
|
10 |
verbose (bool, optional): If True, prints the DP matrix and explains the process.
|
11 |
|
12 |
Returns:
|
13 |
int: The Levenshtein distance between the two strings.
|
14 |
list: If verbose=True, also returns a list of operations performed.
|
15 |
"""
|
16 |
+
# Initialize empty sets if None
|
17 |
+
wildcard_offsets_str1 = set(wildcard_offsets_str1 or [])
|
18 |
+
wildcard_offsets_str2 = set(wildcard_offsets_str2 or [])
|
19 |
+
|
20 |
m, n = len(str1), len(str2)
|
21 |
|
22 |
# Create a matrix of size (m+1) x (n+1)
|
|
|
32 |
# Fill the dp matrix
|
33 |
for i in range(1, m + 1):
|
34 |
for j in range(1, n + 1):
|
35 |
+
# Check if either position is a wildcard
|
36 |
+
is_str1_wildcard = (i - 1) in wildcard_offsets_str1
|
37 |
+
is_str2_wildcard = (j - 1) in wildcard_offsets_str2
|
38 |
+
|
39 |
+
# If either position is a wildcard, treat it as a match (cost = 0)
|
40 |
+
if is_str1_wildcard or is_str2_wildcard:
|
41 |
dp[i][j] = dp[i - 1][j - 1] # No cost for wildcard matches
|
42 |
else:
|
43 |
cost = 0 if str1[i - 1] == str2[j - 1] else 1
|
|
|
48 |
)
|
49 |
|
50 |
if verbose:
|
51 |
+
operations = explain_match(str1, str2, dp, wildcard_offsets_str1, wildcard_offsets_str2)
|
52 |
return dp[m][n], operations
|
53 |
|
54 |
return dp[m][n]
|
55 |
|
56 |
+
def explain_match(str1, str2, dp, wildcard_offsets_str1, wildcard_offsets_str2):
|
57 |
"""
|
58 |
Traces the optimal alignment path and explains each step of the matching process.
|
59 |
|
|
|
61 |
str1 (str): The first string.
|
62 |
str2 (str): The second string.
|
63 |
dp (list): The dynamic programming matrix.
|
64 |
+
wildcard_offsets_str1 (set): Indices in str1 that are wildcards.
|
65 |
+
wildcard_offsets_str2 (set): Indices in str2 that are wildcards.
|
66 |
|
67 |
Returns:
|
68 |
list: A list of explanation strings for each operation performed.
|
|
|
106 |
|
107 |
# Diagonal move (match or substitution)
|
108 |
if curr_i > prev_i and curr_j > prev_j:
|
109 |
+
char1_idx = curr_i-1
|
110 |
+
char2_idx = curr_j-1
|
111 |
+
char1 = str1[char1_idx]
|
112 |
+
char2 = str2[char2_idx]
|
113 |
+
|
114 |
+
is_str1_wildcard = char1_idx in wildcard_offsets_str1
|
115 |
+
is_str2_wildcard = char2_idx in wildcard_offsets_str2
|
116 |
|
117 |
+
if is_str1_wildcard and is_str2_wildcard:
|
118 |
+
operations.append(f"Double wildcard: Position {char1_idx} in str1 and position {char2_idx} in str2 are both wildcards")
|
119 |
+
elif is_str1_wildcard:
|
120 |
+
operations.append(f"Wildcard match: Position {char1_idx} in str1 is a wildcard, matches '{char2}' at position {char2_idx} in str2")
|
121 |
+
elif is_str2_wildcard:
|
122 |
+
operations.append(f"Wildcard match: Position {char2_idx} in str2 is a wildcard, matches '{char1}' at position {char1_idx} in str1")
|
123 |
elif char1 == char2:
|
124 |
+
operations.append(f"Match: '{char1}' at position {char1_idx} matches '{char2}' at position {char2_idx}")
|
125 |
else:
|
126 |
+
operations.append(f"Substitution: Replace '{char1}' at position {char1_idx} with '{char2}' at position {char2_idx}")
|
127 |
|
128 |
# Horizontal move (insertion)
|
129 |
elif curr_i == prev_i and curr_j > prev_j:
|
130 |
+
char_idx = curr_j-1
|
131 |
+
operations.append(f"Insertion: Insert '{str2[char_idx]}' at position {char_idx} in str2")
|
132 |
|
133 |
# Vertical move (deletion)
|
134 |
elif curr_i > prev_i and curr_j == prev_j:
|
135 |
+
char_idx = curr_i-1
|
136 |
+
operations.append(f"Deletion: Delete '{str1[char_idx]}' at position {char_idx} in str1")
|
137 |
|
138 |
return operations
|
139 |
|
140 |
+
def print_match_summary(str1, str2, wildcard_offsets_str1=None, wildcard_offsets_str2=None):
|
141 |
"""
|
142 |
+
Prints a summary of the match between two strings, highlighting wildcards by their offsets.
|
143 |
|
144 |
Args:
|
145 |
str1 (str): The first string.
|
146 |
str2 (str): The second string.
|
147 |
+
wildcard_offsets_str1 (iterable, optional): Indices in str1 that are wildcards. Defaults to None.
|
148 |
+
wildcard_offsets_str2 (iterable, optional): Indices in str2 that are wildcards. Defaults to None.
|
149 |
"""
|
150 |
+
# Initialize empty lists if None
|
151 |
+
wildcard_offsets_str1 = set(wildcard_offsets_str1 or [])
|
152 |
+
wildcard_offsets_str2 = set(wildcard_offsets_str2 or [])
|
153 |
+
|
154 |
+
distance, operations = levenshtein_with_wildcard(
|
155 |
+
str1, str2, wildcard_offsets_str1, wildcard_offsets_str2, verbose=True
|
156 |
+
)
|
157 |
+
|
158 |
+
# Create visual representations of the strings with wildcard markers
|
159 |
+
str1_visual = ""
|
160 |
+
for i, char in enumerate(str1):
|
161 |
+
if i in wildcard_offsets_str1:
|
162 |
+
str1_visual += f"[{char}]" # Mark wildcards with brackets
|
163 |
+
else:
|
164 |
+
str1_visual += char
|
165 |
+
|
166 |
+
str2_visual = ""
|
167 |
+
for i, char in enumerate(str2):
|
168 |
+
if i in wildcard_offsets_str2:
|
169 |
+
str2_visual += f"[{char}]" # Mark wildcards with brackets
|
170 |
+
else:
|
171 |
+
str2_visual += char
|
172 |
|
173 |
+
print(f"Comparing '{str1_visual}' and '{str2_visual}'")
|
174 |
+
print(f"Wildcards in str1: {sorted(wildcard_offsets_str1)}")
|
175 |
+
print(f"Wildcards in str2: {sorted(wildcard_offsets_str2)}")
|
176 |
print(f"Edit distance: {distance}")
|
177 |
print("\nMatch process:")
|
178 |
|
179 |
for i, op in enumerate(operations):
|
180 |
print(f"Step {i+1}: {op}")
|
181 |
|
182 |
+
# Visual representation of the alignment
|
|
|
183 |
i, j = 0, 0
|
184 |
aligned_str1 = ""
|
185 |
aligned_str2 = ""
|
186 |
match_indicators = ""
|
187 |
|
188 |
for op in operations:
|
189 |
+
if "Match:" in op or "Substitution:" in op or "Wildcard match:" in op or "Double wildcard:" in op:
|
190 |
+
is_str1_wildcard = i in wildcard_offsets_str1
|
191 |
+
is_str2_wildcard = j in wildcard_offsets_str2
|
192 |
|
193 |
+
# Add brackets around wildcards
|
194 |
+
if is_str1_wildcard:
|
195 |
+
aligned_str1 += f"[{str1[i]}]"
|
196 |
+
else:
|
197 |
+
aligned_str1 += str1[i]
|
198 |
+
|
199 |
+
if is_str2_wildcard:
|
200 |
+
aligned_str2 += f"[{str2[j]}]"
|
201 |
+
else:
|
202 |
+
aligned_str2 += str2[j]
|
203 |
+
|
204 |
+
# Determine match indicator
|
205 |
+
if "Wildcard match:" in op or "Double wildcard:" in op:
|
206 |
match_indicators += "*" # Wildcard match
|
207 |
+
elif "Match:" in op:
|
208 |
match_indicators += "|" # Exact match
|
209 |
else:
|
210 |
match_indicators += "X" # Substitution
|
211 |
|
212 |
i += 1
|
213 |
j += 1
|
214 |
+
elif "Insertion:" in op:
|
215 |
aligned_str1 += "-"
|
216 |
+
|
217 |
+
if j in wildcard_offsets_str2:
|
218 |
+
aligned_str2 += f"[{str2[j]}]"
|
219 |
+
else:
|
220 |
+
aligned_str2 += str2[j]
|
221 |
+
|
222 |
match_indicators += " "
|
223 |
j += 1
|
224 |
+
elif "Deletion:" in op:
|
225 |
+
if i in wildcard_offsets_str1:
|
226 |
+
aligned_str1 += f"[{str1[i]}]"
|
227 |
+
else:
|
228 |
+
aligned_str1 += str1[i]
|
229 |
+
|
230 |
aligned_str2 += "-"
|
231 |
match_indicators += " "
|
232 |
i += 1
|
|
|
236 |
print(match_indicators)
|
237 |
print(aligned_str2)
|
238 |
print("\nLegend:")
|
239 |
+
print("| = exact match, * = wildcard match, X = substitution, - = gap (insertion/deletion), [c] = wildcard position")
|
240 |
|
241 |
# Summary of wildcard matches
|
242 |
+
wildcard_matches = [op for op in operations if "Wildcard match:" in op or "Double wildcard:" in op]
|
243 |
if wildcard_matches:
|
244 |
print("\nWildcard matches:")
|
245 |
for match in wildcard_matches:
|
|
|
249 |
|
250 |
# Example usage
|
251 |
if __name__ == "__main__":
|
252 |
+
# Example 1: "hello" vs "hello" with no wildcards
|
253 |
+
print_match_summary("hello", "hello")
|
254 |
+
|
255 |
+
# Example 2: "hello" vs "hallo" with no wildcards - expect distance of 1
|
256 |
+
print_match_summary("hello", "hallo")
|
257 |
+
|
258 |
+
# Example 3: "hello" with 3rd position (index 2) as wildcard vs "hallo" - expect distance of 0
|
259 |
+
print_match_summary("hello", "hallo", wildcard_offsets_str1=[2])
|
260 |
+
|
261 |
+
# Example 4: "hello" vs "hillo" with 2nd position (index 1) as wildcard in str2 - expect distance of 0
|
262 |
+
print_match_summary("hello", "hillo", wildcard_offsets_str2=[1])
|
263 |
+
|
264 |
+
# Example 5: Multiple wildcards in str1
|
265 |
+
print_match_summary("hello", "haxyz", wildcard_offsets_str1=[2, 3, 4])
|
266 |
+
|
267 |
+
# Example 6: Wildcards in both strings at different positions
|
268 |
+
print_match_summary("hello", "howdy", wildcard_offsets_str1=[2], wildcard_offsets_str2=[3, 4])
|
main.py
CHANGED
@@ -106,7 +106,12 @@ def compile(compiler, flags, source):
|
|
106 |
assert False, f"Unknown reloc {s}"
|
107 |
|
108 |
relocs_byte_range = [range(r["Offset"], r["Offset"] + reloc_type2size(r["Type"]["Name"])) for r in json_relocs]
|
|
|
|
|
|
|
|
|
109 |
print(f"relocs: {relocs_byte_range}")
|
|
|
110 |
|
111 |
if result.returncode == 0:
|
112 |
return json_relocs, compiled_bytes, compile_output, disassembly
|
|
|
106 |
assert False, f"Unknown reloc {s}"
|
107 |
|
108 |
relocs_byte_range = [range(r["Offset"], r["Offset"] + reloc_type2size(r["Type"]["Name"])) for r in json_relocs]
|
109 |
+
# Flatten relocs_byte_range
|
110 |
+
relocs_byte_range = [i for r in relocs_byte_range for i in r]
|
111 |
+
|
112 |
+
|
113 |
print(f"relocs: {relocs_byte_range}")
|
114 |
+
print(print_match_summary(b"lol", relocs_byte_range, wildcard_offsets_str2=relocs_byte_range))
|
115 |
|
116 |
if result.returncode == 0:
|
117 |
return json_relocs, compiled_bytes, compile_output, disassembly
|