File size: 8,747 Bytes
e2685f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import json
import sys


# Add src to path to import our modules
sys.path.insert(0, "src")

from handlers.tool_call_handler import ToolCallAssembler

# Import standardized test utilities
from tests.test_utils import get_test_logger, create_test_results

# Initialize standardized test logger
logger = get_test_logger(__name__)


def test_actual_streaming_error():
    """Test the JSON repair functionality with the actual streaming error pattern"""

    logger.start_test("Testing JSON repair functionality with actual streaming error")

    # This is based on the actual error from the logs at character 787
    # The pattern shows base64 data ending abruptly with a quote and then a duplicate JSON object
    broken_json = """{"task_description":"create ec2 on aws","calendar_file_content":"QkVHSU46VkNBTEVOREFSClZFUlNJT046Mi4wClBST0RJRDotLy9pY2FsLm1hcnVkb3QuY29tLy9pQ2FsIEV2ZW50IE1ha2VyCkNBTFNDQUxFOkdSRUdPUklBTgpCRUdJTjpWVElNRVpPTkUKVFpJRDpBZnJpY2EvTGFnb3MKTEFTVC1NT0RJRklFRDoyMDI0MDQyMlQwNTM0NTBaClRaVVJMOmh0dHBzOi8vd3d3LnR6dXJsLm9yZy96b25laW5mby1vdXRsb29rL0FmcmljYS9MYWdvcwpYLUxJQy1MT0NBVElPTjpBZnJpY2EvTGFnb3MKQkVHSU46U1RBTkRBUkQKVFpOQU1FOldBVApUWk9GRlNFVEZST006KzAxMDAKVFpPRkZTRVRUTzorMDEwMApEVFNUQVJUOjE5NzAwMTAxVDAwMDAwMApFTkQ6U1RBTkRBUkQKRU5EOlZUSU1FWk9ORQpCRUdJTjpWRVZFTlQKRFRTVEFNUDoyMDI1MDYyMFQxMzQxMjBaClVJRDpyZWN1ci1tZWV0aW5nLTJAbW9jawpEVFNUQVJUO1RaSUQ9QWZyaWNhL0xhZ29zOjIwMjUwNjAyVDE1MDAwMApEVEVORDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDYwMlQxNjAwMDAKU1VNTUFSWTpQcm9qZWN0IFJldmlldwpFTkQ6VkVWRU5UCkJFR0lOO"{"task_description":"create ec2 on aws","calendar_file_content":"QkVHSU46VkNBTEVOREFSClZFUlNJT046Mi4wClBST0RJRDotLy9pY2FsLm1hcnVkb3QuY29tLy9pQ2FsIEV2ZW50IE1ha2VyCkNBTFNDQUxFOkdSRUdPUklBTgpCRUdJTjpWVElNRVpPTkUKVFpJRDpBZnJpY2EvTGFnb3MKTEFTVC1NT0RJRklFRDoyMDI0MDQyMlQwNTM0NTBaClRaVVJMOmh0dHBzOi8vd3d3LnR6dXJsLm9yZy96b25laW5mby1vdXRsb29rL0FmcmljYS9MYWdvcwpYLUxJQy1MT0NBVElPTjpBZnJpY2EvTGFnb3MKQkVHSU46U1RBTkRBUkQKVFpOQU1FOldBVApUWk9GRlNFVEZST006KzAxMDAKVFpPRkZTRVRUTzorMDEwMApEVFNUQVJUOjE5NzAwMTAxVDAwMDAwMApFTkQ6U1RBTkRBUkQKRU5EOlZUSU1FWk9ORQpCRUdJTjpWRVZFTlQKRFRTVEFNUDoyMDI1MDYyMFQxMzQxMjBaClVJRDpyZWN1ci1tZWV0aW5nLTFAbW9jawpEVFNUQVJUO1RaSUQ9QWZyaWNhL0xhZ29zOjIwMjUwNjAzVDEwMDAwMApEVEVORDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDYwM1QxMTAwMDAKU1VNTUFSWTpUZWFtIFN5bmMKRU5EOlZFVkVOVApCRUdJTjpWRVZFTlQKRFRTVEFNUDoyMDI1MDYyMFQxMzQxMjBaClVJRDpzaW5nbGUtZXZlbnQtMUBtb2NrCkRUU1RBUlQ7VFpJRD1BZnJpY2EvTGFnb3M6MjAyNTA2MDVUMTQwMDAwCkRURU5EO1RaSUQ9QWZyaWNhL0xhZ29zOjIwMjUwNjA1VDE1MDAwMApTVU1NQVJZOkNsaWVudCBDYWxsCkVORDpWRVZFTlQKQkVHSU46VkVWRU5UCkRUU1RBTVA6MjAyNTA2MjBUMTM0MTIwWgpVSUQ6c2luZ2xlLWV2ZW50LTRAbW9jawpEVFNUQVJUO1RaSUQ9QWZyaWNhL0xhZ29zOjIwMjUwNjE2VDE2MDAwMApEVEVORDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDYxNlQxNzAwMDAKU1VNTUFSWTpXb3Jrc2hvcApFTkQ6VkVWRU5UCkJFR0lOOlZFVkVOVApEVFNUQU1QOjIwMjUwNjIwVDEzNDEyMFoKVUlEOnNpbmdsZS1ldmVudC0zQG1vY2sKRFRTVEFSVDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDcwN1QxMTAwMDAKRFRFTkQ7VFpJRD1BZnJpY2EvTGFnb3M6MjAyNTA3MDdUMTIwMDAwClNVTU1BUlk6UGxhbm5pbmcgU2Vzc2lvbgpFTkQ6VkVWRU5UCkJFR0lOOlZFVkVOVApEVFNUQU1QOjIwMjUwNjIwVDEzNDEyMFoKVUlEOnNpbmdsZS1ldmVudC01QG1vY2sKRFRTVEFSVDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDcyMlQwOTAwMDAKRFRFTkQ7VFpJRD1BZnJpY2EvTGFnb3M6MjAyNTA3MjJUMTAwMDAwClNVTU1BUlk6RGVtbwpFTkQ6VkVWRU5UCkVORDpWQ0FMRU5EQVI="}"""

    logger.debug(f"Broken JSON length: {len(broken_json)}")

    # Find the error position (where the duplicate starts)
    error_pos = broken_json.find(';"{"task_description"')
    logger.debug(f"Error position (duplicate JSON start): {error_pos}")

    # Try to parse the broken JSON first to confirm it fails
    json_parse_failed = False
    try:
        json.loads(broken_json)
        logger.error("❌ UNEXPECTED: Broken JSON parsed successfully!")
    except json.JSONDecodeError as e:
        json_parse_failed = True
        logger.info(f"βœ… Expected JSON error at position {e.pos}: {e}")
        logger.debug(f"Error context: '{broken_json[max(0, e.pos-20):e.pos+20]}'")

    assert json_parse_failed, "Expected broken JSON to fail parsing"

    # Test the repair function
    assembler = ToolCallAssembler()
    repaired_json = assembler._attempt_json_repair(broken_json)

    assert repaired_json is not None, "Repair should return a result"

    logger.info(f"βœ… Repair attempted, result length: {len(repaired_json)}")
    logger.debug(f"Repaired preview: {repaired_json[:200]}...")

    # Try to parse the repaired JSON
    parsed = json.loads(repaired_json)
    logger.debug(f"Task description: {parsed.get('task_description', 'MISSING')}")
    logger.debug(
        f"Calendar content length: {len(parsed.get('calendar_file_content', ''))}"
    )

    # Verify expected fields exist
    assert "task_description" in parsed, "Repaired JSON should have task_description"
    assert (
        "calendar_file_content" in parsed
    ), "Repaired JSON should have calendar_file_content"
    assert (
        parsed["task_description"] == "create ec2 on aws"
    ), "Task description should match expected value"

    logger.pass_test("Repaired JSON parses correctly")


def test_simpler_corruption():
    """Test a simpler case of JSON corruption for baseline functionality"""

    logger.start_test("Testing simpler JSON corruption")

    # Missing closing brace
    simple_broken = (
        '{"task_description":"test task","calendar_file_content":"base64data"'
    )

    assembler = ToolCallAssembler()
    repaired = assembler._attempt_json_repair(simple_broken)

    assert repaired is not None, "Simple repair should return a result"

    # Try to parse the repaired JSON
    parsed = json.loads(repaired)

    assert (
        "task_description" in parsed
    ), "Simple repair should preserve task_description"
    assert parsed["task_description"] == "test task", "Task description should match"

    logger.pass_test("Simple repair works correctly")


def test_datetime_serialization():
    """Test our datetime serialization fixes"""

    logger.start_test("Testing datetime serialization")

    # Import our safe serialization function
    from ui.pages.chat import safe_json_dumps
    from datetime import datetime

    test_data = {
        "schedule": [
            {
                "task": "Test Task",
                "start_time": datetime(2025, 6, 23, 10, 0),
                "end_time": datetime(2025, 6, 23, 11, 0),
            }
        ],
        "timestamp": datetime.now(),
    }

    result = safe_json_dumps(test_data, indent=2)
    logger.debug(f"Sample output: {result[:200]}...")

    # Verify it's valid JSON
    parsed_back = json.loads(result)
    assert "schedule" in parsed_back, "Serialized result should have schedule"
    assert "timestamp" in parsed_back, "Serialized result should have timestamp"
    assert len(parsed_back["schedule"]) == 1, "Schedule should have one item"

    logger.pass_test("Datetime serialization works correctly")


def test_gradio_format():
    """Test that we're returning the correct format for Gradio messages"""

    logger.start_test("Testing Gradio message format")

    # Simulate a proper messages format
    test_history = [
        {"role": "user", "content": "Hello"},
        {"role": "assistant", "content": "Hi there!"},
        {"role": "user", "content": "Create a schedule"},
    ]

    # This is what our function should return
    expected_format = [
        {"role": "user", "content": "Hello"},
        {"role": "assistant", "content": "Hi there!"},
        {"role": "user", "content": "Create a schedule"},
        {"role": "assistant", "content": "Schedule created successfully!"},
    ]

    logger.debug("Expected format is list of dicts with 'role' and 'content' keys")

    # Validate the format
    for i, msg in enumerate(expected_format):
        assert isinstance(msg, dict), f"Message {i} should be a dict, got {type(msg)}"
        assert "role" in msg, f"Message {i} should have 'role' key"
        assert "content" in msg, f"Message {i} should have 'content' key"
        assert msg["role"] in [
            "user",
            "assistant",
        ], f"Message {i} has invalid role: {msg['role']}"

    logger.pass_test("Message format is correct for Gradio")


if __name__ == "__main__":
    logger.section("JSON Repair and Chat Functionality Tests")

    # Create test results tracker
    results = create_test_results(logger)

    # Run tests using the standardized approach
    results.run_test("streaming_error_repair", test_actual_streaming_error)
    results.run_test("simple_json_repair", test_simpler_corruption)
    results.run_test("datetime_serialization", test_datetime_serialization)
    results.run_test("gradio_message_format", test_gradio_format)

    # Generate summary and exit with appropriate code
    all_passed = results.summary()
    sys.exit(0 if all_passed else 1)