import re import json def debug_text(text, label="Text"): """Helper function to debug text processing issues""" print(f"\n--- DEBUG {label} ---") print(f"Length: {len(text)}") print(f"First 100 chars: {text[:100]}") print(f"Contains highlight_start: {'[[highlight_start]]' in text}") print(f"Contains start_highlight: {'[[start_highlight]]' in text}") print("-------------------------\n") def clean_json_text(text): """ Handle text that came from JSON and might have JSON escaping. This handles the case of text like: "the sky isn\\'t falling" """ # First attempt to clean JSON-style escapes try: # Try to treat the string as if it were a JSON string if '\\' in text: # Create a valid JSON string with the text as content json_str = json.dumps({"text": text}) # Parse it back to get properly unescaped text parsed = json.loads(json_str) return parsed["text"] except Exception: # If that fails, continue with the original text pass return text def process_highlights(text): """ Process highlight markers in text to create HTML highlighted text. Handles both standard format and alternative format. Also properly handles escaped quotes. """ # Debug info # debug_text(text, "Before processing") # Clean JSON escaping text = clean_json_text(text) # Process highlight tags pattern1 = r'\[\[highlight_start\]\](.*?)\[\[highlight_end\]\]' replacement = r'\1' highlighted_text = re.sub(pattern1, replacement, text) pattern2 = r'\[\[start_highlight\]\](.*?)\[\[end_highlight\]\]' highlighted_text = re.sub(pattern2, replacement, highlighted_text) # Debug info # debug_text(highlighted_text, "After processing") return highlighted_text def process_table_with_highlights(markdown_table): """ Special function to process markdown tables with highlights. Ensures the table structure is preserved while applying highlights. """ # First, split the table into lines lines = markdown_table.strip().split('\n') processed_lines = [] for line in lines: # Process highlights in each line processed_line = process_highlights(line) processed_lines.append(processed_line) return convert_markdown_table_to_html('\n'.join(processed_lines)) def convert_markdown_table_to_html(markdown_text): """ Converts a markdown table to an HTML table. """ # Clean JSON escaping markdown_text = clean_json_text(markdown_text) lines = markdown_text.strip().split('\n') table_lines = [line for line in lines if line.strip().startswith('|')] if len(table_lines) < 2: # Need at least header and separator return markdown_text # Return original if not a proper table html = '
{processed_cell} | ' html += '
---|
{processed_cell} | ' html += '
{processed_cell} | ' html += '
{insufficient_reason}
" if insufficient_reason else "The context may not contain enough information to fully answer the question, or the question might be ambiguous. Models should ideally indicate this limitation or refuse to answer.
" html += f"""