Spaces:
Paused
Paused
import json | |
import sys | |
# Add src to path to import our modules | |
sys.path.insert(0, "src") | |
from handlers.tool_call_handler import ToolCallAssembler | |
# Import standardized test utilities | |
from tests.test_utils import get_test_logger, create_test_results | |
# Initialize standardized test logger | |
logger = get_test_logger(__name__) | |
def test_actual_streaming_error(): | |
"""Test the JSON repair functionality with the actual streaming error pattern""" | |
logger.start_test("Testing JSON repair functionality with actual streaming error") | |
# This is based on the actual error from the logs at character 787 | |
# The pattern shows base64 data ending abruptly with a quote and then a duplicate JSON object | |
broken_json = """{"task_description":"create ec2 on aws","calendar_file_content":"QkVHSU46VkNBTEVOREFSClZFUlNJT046Mi4wClBST0RJRDotLy9pY2FsLm1hcnVkb3QuY29tLy9pQ2FsIEV2ZW50IE1ha2VyCkNBTFNDQUxFOkdSRUdPUklBTgpCRUdJTjpWVElNRVpPTkUKVFpJRDpBZnJpY2EvTGFnb3MKTEFTVC1NT0RJRklFRDoyMDI0MDQyMlQwNTM0NTBaClRaVVJMOmh0dHBzOi8vd3d3LnR6dXJsLm9yZy96b25laW5mby1vdXRsb29rL0FmcmljYS9MYWdvcwpYLUxJQy1MT0NBVElPTjpBZnJpY2EvTGFnb3MKQkVHSU46U1RBTkRBUkQKVFpOQU1FOldBVApUWk9GRlNFVEZST006KzAxMDAKVFpPRkZTRVRUTzorMDEwMApEVFNUQVJUOjE5NzAwMTAxVDAwMDAwMApFTkQ6U1RBTkRBUkQKRU5EOlZUSU1FWk9ORQpCRUdJTjpWRVZFTlQKRFRTVEFNUDoyMDI1MDYyMFQxMzQxMjBaClVJRDpyZWN1ci1tZWV0aW5nLTJAbW9jawpEVFNUQVJUO1RaSUQ9QWZyaWNhL0xhZ29zOjIwMjUwNjAyVDE1MDAwMApEVEVORDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDYwMlQxNjAwMDAKU1VNTUFSWTpQcm9qZWN0IFJldmlldwpFTkQ6VkVWRU5UCkJFR0lOO"{"task_description":"create ec2 on aws","calendar_file_content":"QkVHSU46VkNBTEVOREFSClZFUlNJT046Mi4wClBST0RJRDotLy9pY2FsLm1hcnVkb3QuY29tLy9pQ2FsIEV2ZW50IE1ha2VyCkNBTFNDQUxFOkdSRUdPUklBTgpCRUdJTjpWVElNRVpPTkUKVFpJRDpBZnJpY2EvTGFnb3MKTEFTVC1NT0RJRklFRDoyMDI0MDQyMlQwNTM0NTBaClRaVVJMOmh0dHBzOi8vd3d3LnR6dXJsLm9yZy96b25laW5mby1vdXRsb29rL0FmcmljYS9MYWdvcwpYLUxJQy1MT0NBVElPTjpBZnJpY2EvTGFnb3MKQkVHSU46U1RBTkRBUkQKVFpOQU1FOldBVApUWk9GRlNFVEZST006KzAxMDAKVFpPRkZTRVRUTzorMDEwMApEVFNUQVJUOjE5NzAwMTAxVDAwMDAwMApFTkQ6U1RBTkRBUkQKRU5EOlZUSU1FWk9ORQpCRUdJTjpWRVZFTlQKRFRTVEFNUDoyMDI1MDYyMFQxMzQxMjBaClVJRDpyZWN1ci1tZWV0aW5nLTFAbW9jawpEVFNUQVJUO1RaSUQ9QWZyaWNhL0xhZ29zOjIwMjUwNjAzVDEwMDAwMApEVEVORDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDYwM1QxMTAwMDAKU1VNTUFSWTpUZWFtIFN5bmMKRU5EOlZFVkVOVApCRUdJTjpWRVZFTlQKRFRTVEFNUDoyMDI1MDYyMFQxMzQxMjBaClVJRDpzaW5nbGUtZXZlbnQtMUBtb2NrCkRUU1RBUlQ7VFpJRD1BZnJpY2EvTGFnb3M6MjAyNTA2MDVUMTQwMDAwCkRURU5EO1RaSUQ9QWZyaWNhL0xhZ29zOjIwMjUwNjA1VDE1MDAwMApTVU1NQVJZOkNsaWVudCBDYWxsCkVORDpWRVZFTlQKQkVHSU46VkVWRU5UCkRUU1RBTVA6MjAyNTA2MjBUMTM0MTIwWgpVSUQ6c2luZ2xlLWV2ZW50LTRAbW9jawpEVFNUQVJUO1RaSUQ9QWZyaWNhL0xhZ29zOjIwMjUwNjE2VDE2MDAwMApEVEVORDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDYxNlQxNzAwMDAKU1VNTUFSWTpXb3Jrc2hvcApFTkQ6VkVWRU5UCkJFR0lOOlZFVkVOVApEVFNUQU1QOjIwMjUwNjIwVDEzNDEyMFoKVUlEOnNpbmdsZS1ldmVudC0zQG1vY2sKRFRTVEFSVDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDcwN1QxMTAwMDAKRFRFTkQ7VFpJRD1BZnJpY2EvTGFnb3M6MjAyNTA3MDdUMTIwMDAwClNVTU1BUlk6UGxhbm5pbmcgU2Vzc2lvbgpFTkQ6VkVWRU5UCkJFR0lOOlZFVkVOVApEVFNUQU1QOjIwMjUwNjIwVDEzNDEyMFoKVUlEOnNpbmdsZS1ldmVudC01QG1vY2sKRFRTVEFSVDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDcyMlQwOTAwMDAKRFRFTkQ7VFpJRD1BZnJpY2EvTGFnb3M6MjAyNTA3MjJUMTAwMDAwClNVTU1BUlk6RGVtbwpFTkQ6VkVWRU5UCkVORDpWQ0FMRU5EQVI="}""" | |
logger.debug(f"Broken JSON length: {len(broken_json)}") | |
# Find the error position (where the duplicate starts) | |
error_pos = broken_json.find(';"{"task_description"') | |
logger.debug(f"Error position (duplicate JSON start): {error_pos}") | |
# Try to parse the broken JSON first to confirm it fails | |
json_parse_failed = False | |
try: | |
json.loads(broken_json) | |
logger.error("β UNEXPECTED: Broken JSON parsed successfully!") | |
except json.JSONDecodeError as e: | |
json_parse_failed = True | |
logger.info(f"β Expected JSON error at position {e.pos}: {e}") | |
logger.debug(f"Error context: '{broken_json[max(0, e.pos-20):e.pos+20]}'") | |
assert json_parse_failed, "Expected broken JSON to fail parsing" | |
# Test the repair function | |
assembler = ToolCallAssembler() | |
repaired_json = assembler._attempt_json_repair(broken_json) | |
assert repaired_json is not None, "Repair should return a result" | |
logger.info(f"β Repair attempted, result length: {len(repaired_json)}") | |
logger.debug(f"Repaired preview: {repaired_json[:200]}...") | |
# Try to parse the repaired JSON | |
parsed = json.loads(repaired_json) | |
logger.debug(f"Task description: {parsed.get('task_description', 'MISSING')}") | |
logger.debug( | |
f"Calendar content length: {len(parsed.get('calendar_file_content', ''))}" | |
) | |
# Verify expected fields exist | |
assert "task_description" in parsed, "Repaired JSON should have task_description" | |
assert ( | |
"calendar_file_content" in parsed | |
), "Repaired JSON should have calendar_file_content" | |
assert ( | |
parsed["task_description"] == "create ec2 on aws" | |
), "Task description should match expected value" | |
logger.pass_test("Repaired JSON parses correctly") | |
def test_simpler_corruption(): | |
"""Test a simpler case of JSON corruption for baseline functionality""" | |
logger.start_test("Testing simpler JSON corruption") | |
# Missing closing brace | |
simple_broken = ( | |
'{"task_description":"test task","calendar_file_content":"base64data"' | |
) | |
assembler = ToolCallAssembler() | |
repaired = assembler._attempt_json_repair(simple_broken) | |
assert repaired is not None, "Simple repair should return a result" | |
# Try to parse the repaired JSON | |
parsed = json.loads(repaired) | |
assert ( | |
"task_description" in parsed | |
), "Simple repair should preserve task_description" | |
assert parsed["task_description"] == "test task", "Task description should match" | |
logger.pass_test("Simple repair works correctly") | |
def test_datetime_serialization(): | |
"""Test our datetime serialization fixes""" | |
logger.start_test("Testing datetime serialization") | |
# Import our safe serialization function | |
from ui.pages.chat import safe_json_dumps | |
from datetime import datetime | |
test_data = { | |
"schedule": [ | |
{ | |
"task": "Test Task", | |
"start_time": datetime(2025, 6, 23, 10, 0), | |
"end_time": datetime(2025, 6, 23, 11, 0), | |
} | |
], | |
"timestamp": datetime.now(), | |
} | |
result = safe_json_dumps(test_data, indent=2) | |
logger.debug(f"Sample output: {result[:200]}...") | |
# Verify it's valid JSON | |
parsed_back = json.loads(result) | |
assert "schedule" in parsed_back, "Serialized result should have schedule" | |
assert "timestamp" in parsed_back, "Serialized result should have timestamp" | |
assert len(parsed_back["schedule"]) == 1, "Schedule should have one item" | |
logger.pass_test("Datetime serialization works correctly") | |
def test_gradio_format(): | |
"""Test that we're returning the correct format for Gradio messages""" | |
logger.start_test("Testing Gradio message format") | |
# Simulate a proper messages format | |
test_history = [ | |
{"role": "user", "content": "Hello"}, | |
{"role": "assistant", "content": "Hi there!"}, | |
{"role": "user", "content": "Create a schedule"}, | |
] | |
# This is what our function should return | |
expected_format = [ | |
{"role": "user", "content": "Hello"}, | |
{"role": "assistant", "content": "Hi there!"}, | |
{"role": "user", "content": "Create a schedule"}, | |
{"role": "assistant", "content": "Schedule created successfully!"}, | |
] | |
logger.debug("Expected format is list of dicts with 'role' and 'content' keys") | |
# Validate the format | |
for i, msg in enumerate(expected_format): | |
assert isinstance(msg, dict), f"Message {i} should be a dict, got {type(msg)}" | |
assert "role" in msg, f"Message {i} should have 'role' key" | |
assert "content" in msg, f"Message {i} should have 'content' key" | |
assert msg["role"] in [ | |
"user", | |
"assistant", | |
], f"Message {i} has invalid role: {msg['role']}" | |
logger.pass_test("Message format is correct for Gradio") | |
if __name__ == "__main__": | |
logger.section("JSON Repair and Chat Functionality Tests") | |
# Create test results tracker | |
results = create_test_results(logger) | |
# Run tests using the standardized approach | |
results.run_test("streaming_error_repair", test_actual_streaming_error) | |
results.run_test("simple_json_repair", test_simpler_corruption) | |
results.run_test("datetime_serialization", test_datetime_serialization) | |
results.run_test("gradio_message_format", test_gradio_format) | |
# Generate summary and exit with appropriate code | |
all_passed = results.summary() | |
sys.exit(0 if all_passed else 1) | |