File size: 4,743 Bytes
9b5ca29 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import json
import re
try:
from pylatexenc.latexencode import utf8tolatex, UnicodeToLatexEncoder
except:
print("Warning: Missing pylatexenc, please do pip install pylatexenc")
def _print_response(response_type: str, theorem_name: str, content: str, separator: str = "=" * 50) -> None:
"""Print formatted responses from the video generation process.
Prints a formatted response with separators and headers for readability.
Args:
response_type (str): Type of response (e.g., 'Scene Plan', 'Implementation Plan')
theorem_name (str): Name of the theorem being processed
content (str): The content to print
separator (str, optional): Separator string for visual distinction. Defaults to 50 equals signs.
Returns:
None
"""
print(f"\n{separator}")
print(f"{response_type} for {theorem_name}:")
print(f"{separator}\n")
print(content)
print(f"\n{separator}")
def _extract_code(response_text: str) -> str:
"""Extract code blocks from a text response.
Extracts Python code blocks delimited by ```python markers. If no code blocks are found,
returns the entire response text.
Args:
response_text (str): The text response containing code blocks
Returns:
str: The extracted code blocks joined by newlines, or the full response if no blocks found
"""
code = ""
code_blocks = re.findall(r'```python\n(.*?)\n```', response_text, re.DOTALL)
if code_blocks:
code = "\n\n".join(code_blocks)
elif "```" not in response_text: # if no code block, return the whole response
code = response_text
return code
def extract_json(response: str) -> dict:
"""Extract and parse JSON content from a text response.
Attempts to parse the response as JSON directly, then tries to extract JSON from code blocks
if direct parsing fails.
Args:
response (str): The text response containing JSON content
Returns:
dict: The parsed JSON content as a dictionary, or empty list if parsing fails
Note:
Will attempt to parse content between ```json markers first, then between generic ``` markers
"""
try:
evaluation_json = json.loads(response)
except json.JSONDecodeError:
# If JSON parsing fails, try to extract the content between ```json and ```
match = re.search(r'```json\n(.*?)\n```', response, re.DOTALL)
if not match:
# If no match for ```json, try to extract content between ``` and ```
match = re.search(r'```\n(.*?)\n```', response, re.DOTALL)
if match:
evaluation_content = match.group(1)
evaluation_json = json.loads(evaluation_content)
else:
# return empty list
evaluation_json = []
print(f"Warning: Failed to extract valid JSON content from {response}")
return evaluation_json
def _fix_unicode_to_latex(text: str, parse_unicode: bool = True) -> str:
"""Convert Unicode symbols to LaTeX source code.
Converts Unicode subscripts and superscripts to LaTeX format, with optional full Unicode parsing.
Args:
text (str): The text containing Unicode symbols to convert
parse_unicode (bool, optional): Whether to perform full Unicode to LaTeX conversion. Defaults to True.
Returns:
str: The text with Unicode symbols converted to LaTeX format
"""
# Map of unicode subscripts to latex format
subscripts = {
"β": "_0", "β": "_1", "β": "_2", "β": "_3", "β": "_4",
"β
": "_5", "β": "_6", "β": "_7", "β": "_8", "β": "_9",
"β": "_+", "β": "_-"
}
# Map of unicode superscripts to latex format
superscripts = {
"β°": "^0", "ΒΉ": "^1", "Β²": "^2", "Β³": "^3", "β΄": "^4",
"β΅": "^5", "βΆ": "^6", "β·": "^7", "βΈ": "^8", "βΉ": "^9",
"βΊ": "^+", "β»": "^-"
}
for unicode_char, latex_format in {**subscripts, **superscripts}.items():
text = text.replace(unicode_char, latex_format)
if parse_unicode:
text = utf8tolatex(text)
return text
def extract_xml(response: str) -> str:
"""Extract XML content from a text response.
Extracts XML content between ```xml markers. Returns the full response if no XML blocks found.
Args:
response (str): The text response containing XML content
Returns:
str: The extracted XML content, or the full response if no XML blocks found
"""
try:
match = re.search(r'```xml\n(.*?)\n```', response, re.DOTALL)
if match:
return match.group(1)
else:
return response
except Exception:
return response
|