File size: 8,091 Bytes
7e96e8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import re

def create_matching_pattern(word):
    """Creates appropriate regex pattern based on word characteristics"""
    escaped_word = re.escape(word)
    
    # Check for special cases
    if any(char in word for char in '&-/.\'()[]$€£¥+') or ' ' in word:
        # Special handling for phrases with special characters or spaces
        return rf'{escaped_word}'
    elif word.endswith('%'):
        # Special handling for percentage values
        numeric_part = word[:-1]
        return rf'\b{re.escape(numeric_part)}\s*%'
    elif re.search(r'[0-9]', word) and re.search(r'[a-zA-Z]', word):
        # Special handling for alphanumeric combinations
        return rf'{escaped_word}'
    else:
        # Standard word boundary pattern for simple words
        return rf'\b{escaped_word}\b'

def highlight_common_words(common_words, sentences, title):
    """
    Highlight common words in sentences by adding color-coded background and unique IDs.
    
    Args:
        common_words (list of tuples): List of tuples where each tuple contains a word's index and the word.
        sentences (list of str): List of sentences to search through.
        title (str): The title for the HTML output.

    Returns:
        str: HTML string with the highlighted sentences.
    """
    color_map = {}
    color_index = 0
    highlighted_html = []
    pastel_colors = ['#E199C6','#7FB3D5', '#E57373',  '#B388EB', '#80D9AA', '#F0B66B',
                     "#73D9A0", "#9A89EB", "#E38AD4", "#7FAFDB", "#DDAF8C", "#DA7FC1", 
                     "#65CFA5", "#B38FDE", "#E6C97A"]

    # Process each sentence
    for idx, sentence in enumerate(sentences, start=1):
        sentence_with_idx = f"{idx}. {sentence}"
        highlighted_sentence = sentence_with_idx

        # Highlight common words in each sentence
        for index, word in common_words:
            if word not in color_map:
                color_map[word] = pastel_colors[color_index % len(pastel_colors)]
                color_index += 1
            
            # Create appropriate pattern based on word characteristics
            pattern = create_matching_pattern(word)
            
            # Replace the word with highlighted version
            highlighted_sentence = re.sub(
                pattern,
                lambda m, idx=index, color=color_map[word]: (
                    f'<span style="background-color: {color}; font-weight: bold;'
                    f' padding: 2px 4px; border-radius: 2px; position: relative;">'
                    f'<span style="background-color: black; color: white; border-radius: 50%;'
                    f' padding: 2px 5px; margin-right: 5px;">{idx}</span>'
                    f'{m.group(0)}'
                    f'</span>'
                ),
                highlighted_sentence,
                flags=re.IGNORECASE
            )
        
        highlighted_html.append(highlighted_sentence)

    # Format the HTML output with the title
    final_html = "<br><br>".join(highlighted_html)
    return f'''
    <div style="border: solid 1px #FFFFFF; padding: 16px; background-color: #000000; color: #FFFFFF; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 8px;">
        <h3 style="margin-top: 0; font-size: 1em; color: #FFFFFF;">{title}</h3>
        <div style="background-color: #333333; line-height: 1.6; padding: 15px; border-radius: 8px; color: #FFFFFF;">{final_html}</div>
    </div>
    '''

def highlight_common_words_dict(common_words, sentences, title):
    """
    Highlight common words in sentences (from a dictionary) by adding color-coded background and unique IDs.
    
    Args:
        common_words (list of tuples): List of tuples where each tuple contains a word's index and the word.
        sentences (dict): A dictionary of sentences where the key is the sentence and the value is an entailment score.
        title (str): The title for the HTML output.

    Returns:
        str: HTML string with the highlighted sentences and their entailment scores.
    """
    color_map = {}
    color_index = 0
    highlighted_html = []
    pastel_colors = ['#E199C6','#7FB3D5', '#E57373',  '#B388EB', '#80D9AA', '#F0B66B',
                     "#73D9A0", "#9A89EB", "#E38AD4", "#7FAFDB", "#DDAF8C", "#DA7FC1", 
                     "#65CFA5", "#B38FDE", "#E6C97A"]

    # Process each sentence and its score
    for idx, (sentence, score) in enumerate(sentences.items(), start=1):
        sentence_with_idx = f"{idx}. {sentence}"
        highlighted_sentence = sentence_with_idx

        # Highlight common words in each sentence
        for index, word in common_words:
            if word not in color_map:
                color_map[word] = pastel_colors[color_index % len(pastel_colors)]
                color_index += 1

            # Create appropriate pattern based on word characteristics
            pattern = create_matching_pattern(word)

            # Replace the word with highlighted version
            highlighted_sentence = re.sub(
                pattern,
                lambda m, idx=index, color=color_map[word]: (
                    f'<span style="background-color: {color}; font-weight: bold;'
                    f' padding: 1px 2px; border-radius: 2px; position: relative;">'
                    f'<span style="background-color: black; color: white; border-radius: 50%;'
                    f' padding: 1px 3px; margin-right: 3px; font-size: 0.8em;">{idx}</span>'
                    f'{m.group(0)}'
                    f'</span>'
                ),
                highlighted_sentence,
                flags=re.IGNORECASE
            )

        # Add the entailment score
        highlighted_html.append(
            f'<div style="margin-bottom: 5px;">'
            f'{highlighted_sentence}'
            f'<div style="display: inline-block; margin-left: 5px; padding: 3px 5px; border-radius: 3px; '
            f'background-color: #333333; color: white; font-size: 0.9em;">'
            f'Entailment Score: {score}</div></div>'
        )
    
    # Format the HTML output with the title
    final_html = "<br>".join(highlighted_html)
    return f'''
    <div style="background-color: #000000; color: #FFFFFF;border: solid 1px #FFFFFF; border-radius: 8px;">
        <h3 style="margin-top: 0; font-size: 1em; color: #FFFFFF;">{title}</h3>
        <div style="background-color: #333333; line-height: 1.6; padding: 15px; border-radius: 8px; color: #FFFFFF;">{final_html}</div>
    </div>
    '''

def reparaphrased_sentences_html(sentences):
    """
    Create an HTML representation of sentences with numbering.
    
    Args:
        sentences (list of str): List of sentences to format.

    Returns:
        str: HTML string with numbered sentences.
    """
    formatted_sentences = []

    # Process each sentence
    for idx, sentence in enumerate(sentences, start=1):
        sentence_with_idx = f"{idx}. {sentence}"
        formatted_sentences.append(sentence_with_idx)

    # Format the HTML output
    final_html = "<br><br>".join(formatted_sentences)
    return f'''
    <div style="border: solid 1px #FFFFFF; background-color: #000000; color: #FFFFFF;
        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 8px;">
        <div style="background-color: #333333; line-height: 1.6; padding: 15px; border-radius: 8px;">{final_html}</div>
    </div>
    '''

if __name__ == "__main__":
    # Example usage
    common_words = [(1, "highlight"), (2, "numbering"), (3, "S&P 500")]
    sentences = ["This is a test to highlight words.", "Numbering is important for clarity.", "The S&P 500 index rose 2% today."]

    # Test highlight_common_words
    highlighted_html = highlight_common_words(common_words, sentences, "Test Highlighting")
    print(highlighted_html)

    # Test highlight_common_words_dict
    sentences_with_scores = {"Highlight words in this text.": 0.95, "Number sentences for clarity.": 0.8, "The S&P 500 index is a market benchmark.": 0.88}
    highlighted_html_dict = highlight_common_words_dict(common_words, sentences_with_scores, "Test Dict Highlighting")
    print(highlighted_html_dict)