Spaces:
Running
on
Zero
Running
on
Zero
File size: 8,547 Bytes
8a142a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
import re
import json
def debug_text(text, label="Text"):
"""Helper function to debug text processing issues"""
print(f"\n--- DEBUG {label} ---")
print(f"Length: {len(text)}")
print(f"First 100 chars: {text[:100]}")
print(f"Contains highlight_start: {'[[highlight_start]]' in text}")
print(f"Contains start_highlight: {'[[start_highlight]]' in text}")
print("-------------------------\n")
def clean_json_text(text):
"""
Handle text that came from JSON and might have JSON escaping.
This handles the case of text like: "the sky isn\\'t falling"
"""
# First attempt to clean JSON-style escapes
try:
# Try to treat the string as if it were a JSON string
if '\\' in text:
# Create a valid JSON string with the text as content
json_str = json.dumps({"text": text})
# Parse it back to get properly unescaped text
parsed = json.loads(json_str)
return parsed["text"]
except Exception:
# If that fails, continue with the original text
pass
return text
def process_highlights(text):
"""
Process highlight markers in text to create HTML highlighted text.
Handles both standard format and alternative format.
Also properly handles escaped quotes.
"""
# Debug info
# debug_text(text, "Before processing")
# Clean JSON escaping
text = clean_json_text(text)
# Process highlight tags
pattern1 = r'\[\[highlight_start\]\](.*?)\[\[highlight_end\]\]'
replacement = r'<span class="highlight">\1</span>'
highlighted_text = re.sub(pattern1, replacement, text)
pattern2 = r'\[\[start_highlight\]\](.*?)\[\[end_highlight\]\]'
highlighted_text = re.sub(pattern2, replacement, highlighted_text)
# Debug info
# debug_text(highlighted_text, "After processing")
return highlighted_text
def process_table_with_highlights(markdown_table):
"""
Special function to process markdown tables with highlights.
Ensures the table structure is preserved while applying highlights.
"""
# First, split the table into lines
lines = markdown_table.strip().split('\n')
processed_lines = []
for line in lines:
# Process highlights in each line
processed_line = process_highlights(line)
processed_lines.append(processed_line)
return convert_markdown_table_to_html('\n'.join(processed_lines))
def convert_markdown_table_to_html(markdown_text):
"""
Converts a markdown table to an HTML table.
"""
# Clean JSON escaping
markdown_text = clean_json_text(markdown_text)
lines = markdown_text.strip().split('\n')
table_lines = [line for line in lines if line.strip().startswith('|')]
if len(table_lines) < 2: # Need at least header and separator
return markdown_text # Return original if not a proper table
html = '<table class="md-table">'
# Check if we have a header row
if len(table_lines) >= 2 and '---' in table_lines[1]:
# Process header
header_cells = table_lines[0].split('|')[1:-1] if table_lines[0].strip().endswith('|') else table_lines[0].split('|')[1:]
html += '<thead><tr>'
for cell in header_cells:
# Process highlights in the cell
processed_cell = process_highlights(cell.strip())
html += f'<th>{processed_cell}</th>'
html += '</tr></thead>'
# Process data rows (skip the separator row at index 1)
html += '<tbody>'
for line in table_lines[2:]:
if not line.strip():
continue
cells = line.split('|')[1:-1] if line.strip().endswith('|') else line.split('|')[1:]
html += '<tr>'
for cell in cells:
# Process highlights in the cell
processed_cell = process_highlights(cell.strip())
html += f'<td>{processed_cell}</td>'
html += '</tr>'
html += '</tbody>'
else:
# No header row, treat all rows as data
html += '<tbody>'
for line in table_lines:
if not line.strip():
continue
cells = line.split('|')[1:-1] if line.strip().endswith('|') else line.split('|')[1:]
html += '<tr>'
for cell in cells:
# Process highlights in the cell
processed_cell = process_highlights(cell.strip())
html += f'<td>{processed_cell}</td>'
html += '</tr>'
html += '</tbody>'
html += '</table>'
return html
def get_context_html(example, show_full=False):
"""
Formats the context chunks into an HTML string for display using specific CSS classes.
Includes an alert for insufficient context and applies highlighting.
Parameters:
- example: The example data containing contexts
- show_full: Boolean indicating whether to show full context
"""
html = ""
# Add insufficient context warning if needed
if example.get("insufficient", False):
insufficient_reason = example.get("insufficient_reason", "")
reason_html = f"<p>{insufficient_reason}</p>" if insufficient_reason else "<p>The context may not contain enough information to fully answer the question, or the question might be ambiguous. Models should ideally indicate this limitation or refuse to answer.</p>"
html += f"""
<div class="insufficient-alert">
<strong>
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align: middle; margin-right: 5px;">
<path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3Z"></path>
<line x1="12" y1="9" x2="12" y2="13"></line>
<line x1="12" y1="17" x2="12.01" y2="17"></line>
</svg>
Insufficient Context
</strong>
{reason_html}
</div>
"""
# Create container div for all context items
html += '<div class="context-items-container">'
# Determine which context to display based on show_full flag
if show_full and "full_contexts" in example and example["full_contexts"]:
# If showing full context, create individual items for each chunk without headers
for context_item in example["full_contexts"]:
context_text = context_item.get('content', '')
# Check for markdown table format (both standard and newline format)
if '|' in context_text and ('\n|' in context_text or '\n-' in context_text):
# Process as a table
html += f'<div class="context-item">{process_table_with_highlights(context_text)}</div>'
else:
# Regular text content - process highlights
processed_text = process_highlights(context_text)
html += f'<div class="context-item">{processed_text}</div>'
else:
# Show the highlighted context items
if "contexts" in example and example["contexts"]:
for context_item in example["contexts"]:
chunk_num = context_item.get('chunk_num', '')
context_text = context_item.get('content', '')
is_primary = context_item.get('is_primary', False)
# Add appropriate class for primary chunks
extra_class = " primary-context" if is_primary else ""
# Check for markdown table format
if '|' in context_text and ('\n|' in context_text or '\n-' in context_text):
# Process as a table
html += f'<div class="context-item{extra_class}">{process_table_with_highlights(context_text)}</div>'
else:
# Regular text with potential highlights
processed_text = process_highlights(context_text)
html += f'<div class="context-item{extra_class}">{processed_text}</div>'
else:
# If no contexts available, show a message
html += '<div class="context-item">No context available. Try toggling to full context view.</div>'
# Close the container div
html += '</div>'
return html |