File size: 4,743 Bytes
9b5ca29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import json
import re
try:
    from pylatexenc.latexencode import utf8tolatex, UnicodeToLatexEncoder
except:
    print("Warning: Missing pylatexenc, please do pip install pylatexenc")

def _print_response(response_type: str, theorem_name: str, content: str, separator: str = "=" * 50) -> None:
    """Print formatted responses from the video generation process.

    Prints a formatted response with separators and headers for readability.

    Args:
        response_type (str): Type of response (e.g., 'Scene Plan', 'Implementation Plan')
        theorem_name (str): Name of the theorem being processed
        content (str): The content to print
        separator (str, optional): Separator string for visual distinction. Defaults to 50 equals signs.

    Returns:
        None
    """
    print(f"\n{separator}")
    print(f"{response_type} for {theorem_name}:")
    print(f"{separator}\n")
    print(content)
    print(f"\n{separator}")

def _extract_code(response_text: str) -> str:
    """Extract code blocks from a text response.

    Extracts Python code blocks delimited by ```python markers. If no code blocks are found,
    returns the entire response text.

    Args:
        response_text (str): The text response containing code blocks

    Returns:
        str: The extracted code blocks joined by newlines, or the full response if no blocks found
    """
    code = ""
    code_blocks = re.findall(r'```python\n(.*?)\n```', response_text, re.DOTALL)
    if code_blocks:
        code = "\n\n".join(code_blocks)
    elif "```" not in response_text: # if no code block, return the whole response
        code = response_text
    return code 

def extract_json(response: str) -> dict:
    """Extract and parse JSON content from a text response.

    Attempts to parse the response as JSON directly, then tries to extract JSON from code blocks
    if direct parsing fails.

    Args:
        response (str): The text response containing JSON content

    Returns:
        dict: The parsed JSON content as a dictionary, or empty list if parsing fails

    Note:
        Will attempt to parse content between ```json markers first, then between generic ``` markers
    """
    try:
        evaluation_json = json.loads(response)
    except json.JSONDecodeError:
        # If JSON parsing fails, try to extract the content between ```json and ```
        match = re.search(r'```json\n(.*?)\n```', response, re.DOTALL)
        if not match:
            # If no match for ```json, try to extract content between ``` and ```
            match = re.search(r'```\n(.*?)\n```', response, re.DOTALL)
        
        if match:
            evaluation_content = match.group(1)
            evaluation_json = json.loads(evaluation_content)
        else:
            # return empty list
            evaluation_json = []
            print(f"Warning: Failed to extract valid JSON content from {response}")
    return evaluation_json

def _fix_unicode_to_latex(text: str, parse_unicode: bool = True) -> str:
    """Convert Unicode symbols to LaTeX source code.

    Converts Unicode subscripts and superscripts to LaTeX format, with optional full Unicode parsing.

    Args:
        text (str): The text containing Unicode symbols to convert
        parse_unicode (bool, optional): Whether to perform full Unicode to LaTeX conversion. Defaults to True.

    Returns:
        str: The text with Unicode symbols converted to LaTeX format
    """
    # Map of unicode subscripts to latex format
    subscripts = {
        "β‚€": "_0", "₁": "_1", "β‚‚": "_2", "₃": "_3", "β‚„": "_4",
        "β‚…": "_5", "₆": "_6", "₇": "_7", "β‚ˆ": "_8", "₉": "_9",
        "β‚Š": "_+", "β‚‹": "_-"
    }
    # Map of unicode superscripts to latex format  
    superscripts = {
        "⁰": "^0", "¹": "^1", "²": "^2", "³": "^3", "⁴": "^4",
        "⁡": "^5", "⁢": "^6", "⁷": "^7", "⁸": "^8", "⁹": "^9",
        "⁺": "^+", "⁻": "^-"
    }

    for unicode_char, latex_format in {**subscripts, **superscripts}.items():
        text = text.replace(unicode_char, latex_format)

    if parse_unicode:
        text = utf8tolatex(text)

    return text

def extract_xml(response: str) -> str:
    """Extract XML content from a text response.

    Extracts XML content between ```xml markers. Returns the full response if no XML blocks found.

    Args:
        response (str): The text response containing XML content

    Returns:
        str: The extracted XML content, or the full response if no XML blocks found
    """
    try:
        match = re.search(r'```xml\n(.*?)\n```', response, re.DOTALL)
        if match:
            return match.group(1)
        else:
            return response
    except Exception:
        return response