decompilation-metrics

Sleeping

App Files Files Community

ejschwartz commited on 30 days ago

Commit

8427d96

1 Parent(s): dda3a0e

Adjust dist api

Browse files

Files changed (2) hide show

dist.py +116 -47
main.py +5 -0

dist.py CHANGED Viewed

@@ -1,17 +1,22 @@
-def levenshtein_with_wildcards(str1, str2, wildcard='?', verbose=False):
     """
-    Calculate the Levenshtein distance between two strings with support for wildcards.
     Args:
         str1 (str): The first string.
         str2 (str): The second string.
-        wildcard (str, optional): The wildcard character. Defaults to '?'.
         verbose (bool, optional): If True, prints the DP matrix and explains the process.
     Returns:
         int: The Levenshtein distance between the two strings.
         list: If verbose=True, also returns a list of operations performed.
     """
     m, n = len(str1), len(str2)
     # Create a matrix of size (m+1) x (n+1)
@@ -27,8 +32,12 @@ def levenshtein_with_wildcards(str1, str2, wildcard='?', verbose=False):
     # Fill the dp matrix
     for i in range(1, m + 1):
         for j in range(1, n + 1):
-            # If either character is a wildcard, treat it as a match (cost = 0)
-            if str1[i - 1] == wildcard or str2[j - 1] == wildcard:
                 dp[i][j] = dp[i - 1][j - 1]  # No cost for wildcard matches
             else:
                 cost = 0 if str1[i - 1] == str2[j - 1] else 1
@@ -39,12 +48,12 @@ def levenshtein_with_wildcards(str1, str2, wildcard='?', verbose=False):
                 )
     if verbose:
-        operations = explain_match(str1, str2, dp, wildcard)
         return dp[m][n], operations
     return dp[m][n]
-def explain_match(str1, str2, dp, wildcard='?'):
     """
     Traces the optimal alignment path and explains each step of the matching process.
@@ -52,7 +61,8 @@ def explain_match(str1, str2, dp, wildcard='?'):
         str1 (str): The first string.
         str2 (str): The second string.
         dp (list): The dynamic programming matrix.
-        wildcard (str, optional): The wildcard character. Defaults to '?'.
     Returns:
         list: A list of explanation strings for each operation performed.
@@ -96,74 +106,127 @@ def explain_match(str1, str2, dp, wildcard='?'):
         # Diagonal move (match or substitution)
         if curr_i > prev_i and curr_j > prev_j:
-            char1 = str1[curr_i-1]
-            char2 = str2[curr_j-1]
-            if char1 == wildcard or char2 == wildcard:
-                wildcard_char = char1 if char1 == wildcard else char2
-                match_char = char2 if char1 == wildcard else char1
-                operations.append(f"Wildcard match: '{wildcard_char}' matches any character, here '{match_char}'")
             elif char1 == char2:
-                operations.append(f"Match: '{char1}' matches '{char2}'")
             else:
-                operations.append(f"Substitution: Replace '{char1}' with '{char2}'")
         # Horizontal move (insertion)
         elif curr_i == prev_i and curr_j > prev_j:
-            operations.append(f"Insertion: Insert '{str2[curr_j-1]}'")
         # Vertical move (deletion)
         elif curr_i > prev_i and curr_j == prev_j:
-            operations.append(f"Deletion: Delete '{str1[curr_i-1]}'")
     return operations
-def print_match_summary(str1, str2, wildcard='?'):
     """
-    Prints a summary of the match between two strings, highlighting wildcards.
     Args:
         str1 (str): The first string.
         str2 (str): The second string.
-        wildcard (str, optional): The wildcard character. Defaults to '?'.
     """
-    distance, operations = levenshtein_with_wildcards(str1, str2, wildcard, verbose=True)
-    print(f"Comparing '{str1}' and '{str2}' (wildcard: '{wildcard}')")
     print(f"Edit distance: {distance}")
     print("\nMatch process:")
     for i, op in enumerate(operations):
         print(f"Step {i+1}: {op}")
-    # Visual representation
-    alignment = []
     i, j = 0, 0
     aligned_str1 = ""
     aligned_str2 = ""
     match_indicators = ""
     for op in operations:
-        if "match" in op or "Match" in op or "Substitution" in op:
-            aligned_str1 += str1[i]
-            aligned_str2 += str2[j]
-            if "Wildcard" in op:
                 match_indicators += "*"  # Wildcard match
-            elif "Match" in op:
                 match_indicators += "|"  # Exact match
             else:
                 match_indicators += "X"  # Substitution
             i += 1
             j += 1
-        elif "Insertion" in op:
             aligned_str1 += "-"
-            aligned_str2 += str2[j]
             match_indicators += " "
             j += 1
-        elif "Deletion" in op:
-            aligned_str1 += str1[i]
             aligned_str2 += "-"
             match_indicators += " "
             i += 1
@@ -173,10 +236,10 @@ def print_match_summary(str1, str2, wildcard='?'):
     print(match_indicators)
     print(aligned_str2)
     print("\nLegend:")
-    print("| = exact match, * = wildcard match, X = substitution, - = gap (insertion/deletion)")
     # Summary of wildcard matches
-    wildcard_matches = [op for op in operations if "Wildcard" in op]
     if wildcard_matches:
         print("\nWildcard matches:")
         for match in wildcard_matches:
@@ -186,14 +249,20 @@ def print_match_summary(str1, str2, wildcard='?'):
 # Example usage
 if __name__ == "__main__":
-    # Basic examples
-    print_match_summary("hello", "hello")       # 0 (identical strings)
-    print_match_summary("hello", "hallo")       # 1 (one substitution)
-    print_match_summary("he?lo", "hello")       # 0 (wildcard matches 'l')
-    print_match_summary("he?lo", "hallo")       # 0 (wildcard matches 'a')
-    print_match_summary("h?llo", "hello")       # 0 (wildcard matches 'e')
-    print_match_summary("h?llo", "hillo")       # 0 (wildcard matches 'i')
-    print_match_summary("c?t", "cat")           # 0 (wildcard matches 'a')
-    print_match_summary("c?t", "cut")           # 0 (wildcard matches 'u')
-    print_match_summary("w?rd", "word")         # 0 (wildcard matches 'o')
-    print_match_summary("d?g", "dog")           # 0 (wildcard matches 'o')

+def levenshtein_with_wildcard(str1, str2, wildcard_offsets_str1=None, wildcard_offsets_str2=None, verbose=False):
     """
+    Calculate the Levenshtein distance between two strings with support for wildcards at specific positions.
     Args:
         str1 (str): The first string.
         str2 (str): The second string.
+        wildcard_offsets_str1 (iterable, optional): Indices in str1 that are wildcards. Defaults to None.
+        wildcard_offsets_str2 (iterable, optional): Indices in str2 that are wildcards. Defaults to None.
         verbose (bool, optional): If True, prints the DP matrix and explains the process.
     Returns:
         int: The Levenshtein distance between the two strings.
         list: If verbose=True, also returns a list of operations performed.
     """
+    # Initialize empty sets if None
+    wildcard_offsets_str1 = set(wildcard_offsets_str1 or [])
+    wildcard_offsets_str2 = set(wildcard_offsets_str2 or [])
     m, n = len(str1), len(str2)
     # Create a matrix of size (m+1) x (n+1)
     # Fill the dp matrix
     for i in range(1, m + 1):
         for j in range(1, n + 1):
+            # Check if either position is a wildcard
+            is_str1_wildcard = (i - 1) in wildcard_offsets_str1
+            is_str2_wildcard = (j - 1) in wildcard_offsets_str2
+            # If either position is a wildcard, treat it as a match (cost = 0)
+            if is_str1_wildcard or is_str2_wildcard:
                 dp[i][j] = dp[i - 1][j - 1]  # No cost for wildcard matches
             else:
                 cost = 0 if str1[i - 1] == str2[j - 1] else 1
                 )
     if verbose:
+        operations = explain_match(str1, str2, dp, wildcard_offsets_str1, wildcard_offsets_str2)
         return dp[m][n], operations
     return dp[m][n]
+def explain_match(str1, str2, dp, wildcard_offsets_str1, wildcard_offsets_str2):
     """
     Traces the optimal alignment path and explains each step of the matching process.
         str1 (str): The first string.
         str2 (str): The second string.
         dp (list): The dynamic programming matrix.
+        wildcard_offsets_str1 (set): Indices in str1 that are wildcards.
+        wildcard_offsets_str2 (set): Indices in str2 that are wildcards.
     Returns:
         list: A list of explanation strings for each operation performed.
         # Diagonal move (match or substitution)
         if curr_i > prev_i and curr_j > prev_j:
+            char1_idx = curr_i-1
+            char2_idx = curr_j-1
+            char1 = str1[char1_idx]
+            char2 = str2[char2_idx]
+            is_str1_wildcard = char1_idx in wildcard_offsets_str1
+            is_str2_wildcard = char2_idx in wildcard_offsets_str2
+            if is_str1_wildcard and is_str2_wildcard:
+                operations.append(f"Double wildcard: Position {char1_idx} in str1 and position {char2_idx} in str2 are both wildcards")
+            elif is_str1_wildcard:
+                operations.append(f"Wildcard match: Position {char1_idx} in str1 is a wildcard, matches '{char2}' at position {char2_idx} in str2")
+            elif is_str2_wildcard:
+                operations.append(f"Wildcard match: Position {char2_idx} in str2 is a wildcard, matches '{char1}' at position {char1_idx} in str1")
             elif char1 == char2:
+                operations.append(f"Match: '{char1}' at position {char1_idx} matches '{char2}' at position {char2_idx}")
             else:
+                operations.append(f"Substitution: Replace '{char1}' at position {char1_idx} with '{char2}' at position {char2_idx}")
         # Horizontal move (insertion)
         elif curr_i == prev_i and curr_j > prev_j:
+            char_idx = curr_j-1
+            operations.append(f"Insertion: Insert '{str2[char_idx]}' at position {char_idx} in str2")
         # Vertical move (deletion)
         elif curr_i > prev_i and curr_j == prev_j:
+            char_idx = curr_i-1
+            operations.append(f"Deletion: Delete '{str1[char_idx]}' at position {char_idx} in str1")
     return operations
+def print_match_summary(str1, str2, wildcard_offsets_str1=None, wildcard_offsets_str2=None):
     """
+    Prints a summary of the match between two strings, highlighting wildcards by their offsets.
     Args:
         str1 (str): The first string.
         str2 (str): The second string.
+        wildcard_offsets_str1 (iterable, optional): Indices in str1 that are wildcards. Defaults to None.
+        wildcard_offsets_str2 (iterable, optional): Indices in str2 that are wildcards. Defaults to None.
     """
+    # Initialize empty lists if None
+    wildcard_offsets_str1 = set(wildcard_offsets_str1 or [])
+    wildcard_offsets_str2 = set(wildcard_offsets_str2 or [])
+    distance, operations = levenshtein_with_wildcard(
+        str1, str2, wildcard_offsets_str1, wildcard_offsets_str2, verbose=True
+    )
+    # Create visual representations of the strings with wildcard markers
+    str1_visual = ""
+    for i, char in enumerate(str1):
+        if i in wildcard_offsets_str1:
+            str1_visual += f"[{char}]"  # Mark wildcards with brackets
+        else:
+            str1_visual += char
+    str2_visual = ""
+    for i, char in enumerate(str2):
+        if i in wildcard_offsets_str2:
+            str2_visual += f"[{char}]"  # Mark wildcards with brackets
+        else:
+            str2_visual += char
+    print(f"Comparing '{str1_visual}' and '{str2_visual}'")
+    print(f"Wildcards in str1: {sorted(wildcard_offsets_str1)}")
+    print(f"Wildcards in str2: {sorted(wildcard_offsets_str2)}")
     print(f"Edit distance: {distance}")
     print("\nMatch process:")
     for i, op in enumerate(operations):
         print(f"Step {i+1}: {op}")
+    # Visual representation of the alignment
     i, j = 0, 0
     aligned_str1 = ""
     aligned_str2 = ""
     match_indicators = ""
     for op in operations:
+        if "Match:" in op or "Substitution:" in op or "Wildcard match:" in op or "Double wildcard:" in op:
+            is_str1_wildcard = i in wildcard_offsets_str1
+            is_str2_wildcard = j in wildcard_offsets_str2
+            # Add brackets around wildcards
+            if is_str1_wildcard:
+                aligned_str1 += f"[{str1[i]}]"
+            else:
+                aligned_str1 += str1[i]
+            if is_str2_wildcard:
+                aligned_str2 += f"[{str2[j]}]"
+            else:
+                aligned_str2 += str2[j]
+            # Determine match indicator
+            if "Wildcard match:" in op or "Double wildcard:" in op:
                 match_indicators += "*"  # Wildcard match
+            elif "Match:" in op:
                 match_indicators += "|"  # Exact match
             else:
                 match_indicators += "X"  # Substitution
             i += 1
             j += 1
+        elif "Insertion:" in op:
             aligned_str1 += "-"
+            if j in wildcard_offsets_str2:
+                aligned_str2 += f"[{str2[j]}]"
+            else:
+                aligned_str2 += str2[j]
             match_indicators += " "
             j += 1
+        elif "Deletion:" in op:
+            if i in wildcard_offsets_str1:
+                aligned_str1 += f"[{str1[i]}]"
+            else:
+                aligned_str1 += str1[i]
             aligned_str2 += "-"
             match_indicators += " "
             i += 1
     print(match_indicators)
     print(aligned_str2)
     print("\nLegend:")
+    print("| = exact match, * = wildcard match, X = substitution, - = gap (insertion/deletion), [c] = wildcard position")
     # Summary of wildcard matches
+    wildcard_matches = [op for op in operations if "Wildcard match:" in op or "Double wildcard:" in op]
     if wildcard_matches:
         print("\nWildcard matches:")
         for match in wildcard_matches:
 # Example usage
 if __name__ == "__main__":
+    # Example 1: "hello" vs "hello" with no wildcards
+    print_match_summary("hello", "hello")
+    # Example 2: "hello" vs "hallo" with no wildcards - expect distance of 1
+    print_match_summary("hello", "hallo")
+    # Example 3: "hello" with 3rd position (index 2) as wildcard vs "hallo" - expect distance of 0
+    print_match_summary("hello", "hallo", wildcard_offsets_str1=[2])
+    # Example 4: "hello" vs "hillo" with 2nd position (index 1) as wildcard in str2 - expect distance of 0
+    print_match_summary("hello", "hillo", wildcard_offsets_str2=[1])
+    # Example 5: Multiple wildcards in str1
+    print_match_summary("hello", "haxyz", wildcard_offsets_str1=[2, 3, 4])
+    # Example 6: Wildcards in both strings at different positions
+    print_match_summary("hello", "howdy", wildcard_offsets_str1=[2], wildcard_offsets_str2=[3, 4])

main.py CHANGED Viewed

@@ -106,7 +106,12 @@ def compile(compiler, flags, source):
                 assert False, f"Unknown reloc {s}"
     relocs_byte_range = [range(r["Offset"], r["Offset"] + reloc_type2size(r["Type"]["Name"])) for r in json_relocs]
     print(f"relocs: {relocs_byte_range}")
     if result.returncode == 0:
         return json_relocs, compiled_bytes, compile_output, disassembly

                 assert False, f"Unknown reloc {s}"
     relocs_byte_range = [range(r["Offset"], r["Offset"] + reloc_type2size(r["Type"]["Name"])) for r in json_relocs]
+    # Flatten relocs_byte_range
+    relocs_byte_range = [i for r in relocs_byte_range for i in r]
     print(f"relocs: {relocs_byte_range}")
+    print(print_match_summary(b"lol", relocs_byte_range, wildcard_offsets_str2=relocs_byte_range))
     if result.returncode == 0:
         return json_relocs, compiled_bytes, compile_output, disassembly