decompilation-metrics

Running

File size: 6,940 Bytes

52b18ab

def levenshtein_with_wildcards(str1, str2, wildcard='?', verbose=False):
    """
    Calculate the Levenshtein distance between two strings with support for wildcards.
    
    Args:
        str1 (str): The first string.
        str2 (str): The second string.
        wildcard (str, optional): The wildcard character. Defaults to '?'.
        verbose (bool, optional): If True, prints the DP matrix and explains the process.
        
    Returns:
        int: The Levenshtein distance between the two strings.
        list: If verbose=True, also returns a list of operations performed.
    """
    m, n = len(str1), len(str2)
    
    # Create a matrix of size (m+1) x (n+1)
    dp = [[0] * (n + 1) for _ in range(m + 1)]
    
    # Initialize the first row and column
    for i in range(m + 1):
        dp[i][0] = i
    
    for j in range(n + 1):
        dp[0][j] = j
    
    # Fill the dp matrix
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            # If either character is a wildcard, treat it as a match (cost = 0)
            if str1[i - 1] == wildcard or str2[j - 1] == wildcard:
                dp[i][j] = dp[i - 1][j - 1]  # No cost for wildcard matches
            else:
                cost = 0 if str1[i - 1] == str2[j - 1] else 1
                dp[i][j] = min(
                    dp[i - 1][j] + 1,      # deletion
                    dp[i][j - 1] + 1,      # insertion
                    dp[i - 1][j - 1] + cost  # substitution
                )
    
    if verbose:
        operations = explain_match(str1, str2, dp, wildcard)
        return dp[m][n], operations
    
    return dp[m][n]

def explain_match(str1, str2, dp, wildcard='?'):
    """
    Traces the optimal alignment path and explains each step of the matching process.
    
    Args:
        str1 (str): The first string.
        str2 (str): The second string.
        dp (list): The dynamic programming matrix.
        wildcard (str, optional): The wildcard character. Defaults to '?'.
        
    Returns:
        list: A list of explanation strings for each operation performed.
    """
    m, n = len(str1), len(str2)
    operations = []
    
    # Find the optimal path
    i, j = m, n
    path = []
    
    while i > 0 or j > 0:
        path.append((i, j))
        
        if i == 0:
            j -= 1
        elif j == 0:
            i -= 1
        else:
            substitution_cost = dp[i-1][j-1]
            deletion_cost = dp[i-1][j]
            insertion_cost = dp[i][j-1]
            
            min_cost = min(substitution_cost, deletion_cost, insertion_cost)
            
            if min_cost == substitution_cost:
                i -= 1
                j -= 1
            elif min_cost == deletion_cost:
                i -= 1
            else:
                j -= 1
    
    path.append((0, 0))
    path.reverse()
    
    # Generate explanations for each step
    for idx in range(1, len(path)):
        prev_i, prev_j = path[idx-1]
        curr_i, curr_j = path[idx]
        
        # Diagonal move (match or substitution)
        if curr_i > prev_i and curr_j > prev_j:
            char1 = str1[curr_i-1]
            char2 = str2[curr_j-1]
            
            if char1 == wildcard or char2 == wildcard:
                wildcard_char = char1 if char1 == wildcard else char2
                match_char = char2 if char1 == wildcard else char1
                operations.append(f"Wildcard match: '{wildcard_char}' matches any character, here '{match_char}'")
            elif char1 == char2:
                operations.append(f"Match: '{char1}' matches '{char2}'")
            else:
                operations.append(f"Substitution: Replace '{char1}' with '{char2}'")
        
        # Horizontal move (insertion)
        elif curr_i == prev_i and curr_j > prev_j:
            operations.append(f"Insertion: Insert '{str2[curr_j-1]}'")
        
        # Vertical move (deletion)
        elif curr_i > prev_i and curr_j == prev_j:
            operations.append(f"Deletion: Delete '{str1[curr_i-1]}'")
    
    return operations

def print_match_summary(str1, str2, wildcard='?'):
    """
    Prints a summary of the match between two strings, highlighting wildcards.
    
    Args:
        str1 (str): The first string.
        str2 (str): The second string.
        wildcard (str, optional): The wildcard character. Defaults to '?'.
    """
    distance, operations = levenshtein_with_wildcards(str1, str2, wildcard, verbose=True)
    
    print(f"Comparing '{str1}' and '{str2}' (wildcard: '{wildcard}')")
    print(f"Edit distance: {distance}")
    print("\nMatch process:")
    
    for i, op in enumerate(operations):
        print(f"Step {i+1}: {op}")
    
    # Visual representation
    alignment = []
    i, j = 0, 0
    aligned_str1 = ""
    aligned_str2 = ""
    match_indicators = ""
    
    for op in operations:
        if "match" in op or "Match" in op or "Substitution" in op:
            aligned_str1 += str1[i]
            aligned_str2 += str2[j]
            
            if "Wildcard" in op:
                match_indicators += "*"  # Wildcard match
            elif "Match" in op:
                match_indicators += "|"  # Exact match
            else:
                match_indicators += "X"  # Substitution
            
            i += 1
            j += 1
        elif "Insertion" in op:
            aligned_str1 += "-"
            aligned_str2 += str2[j]
            match_indicators += " "
            j += 1
        elif "Deletion" in op:
            aligned_str1 += str1[i]
            aligned_str2 += "-"
            match_indicators += " "
            i += 1
    
    print("\nAlignment:")
    print(aligned_str1)
    print(match_indicators)
    print(aligned_str2)
    print("\nLegend:")
    print("| = exact match, * = wildcard match, X = substitution, - = gap (insertion/deletion)")
    
    # Summary of wildcard matches
    wildcard_matches = [op for op in operations if "Wildcard" in op]
    if wildcard_matches:
        print("\nWildcard matches:")
        for match in wildcard_matches:
            print(f"- {match}")
    
    return distance, operations

# Example usage
if __name__ == "__main__":
    # Basic examples
    print_match_summary("hello", "hello")       # 0 (identical strings)
    print_match_summary("hello", "hallo")       # 1 (one substitution)
    print_match_summary("he?lo", "hello")       # 0 (wildcard matches 'l')
    print_match_summary("he?lo", "hallo")       # 0 (wildcard matches 'a')
    print_match_summary("h?llo", "hello")       # 0 (wildcard matches 'e')
    print_match_summary("h?llo", "hillo")       # 0 (wildcard matches 'i')
    print_match_summary("c?t", "cat")           # 0 (wildcard matches 'a')
    print_match_summary("c?t", "cut")           # 0 (wildcard matches 'u')
    print_match_summary("w?rd", "word")         # 0 (wildcard matches 'o')
    print_match_summary("d?g", "dog")           # 0 (wildcard matches 'o')