import json import sys # Add src to path to import our modules sys.path.insert(0, "src") from handlers.tool_call_handler import ToolCallAssembler # Import standardized test utilities from tests.test_utils import get_test_logger, create_test_results # Initialize standardized test logger logger = get_test_logger(__name__) def test_actual_streaming_error(): """Test the JSON repair functionality with the actual streaming error pattern""" logger.start_test("Testing JSON repair functionality with actual streaming error") # This is based on the actual error from the logs at character 787 # The pattern shows base64 data ending abruptly with a quote and then a duplicate JSON object broken_json = """{"task_description":"create ec2 on aws","calendar_file_content":"QkVHSU46VkNBTEVOREFSClZFUlNJT046Mi4wClBST0RJRDotLy9pY2FsLm1hcnVkb3QuY29tLy9pQ2FsIEV2ZW50IE1ha2VyCkNBTFNDQUxFOkdSRUdPUklBTgpCRUdJTjpWVElNRVpPTkUKVFpJRDpBZnJpY2EvTGFnb3MKTEFTVC1NT0RJRklFRDoyMDI0MDQyMlQwNTM0NTBaClRaVVJMOmh0dHBzOi8vd3d3LnR6dXJsLm9yZy96b25laW5mby1vdXRsb29rL0FmcmljYS9MYWdvcwpYLUxJQy1MT0NBVElPTjpBZnJpY2EvTGFnb3MKQkVHSU46U1RBTkRBUkQKVFpOQU1FOldBVApUWk9GRlNFVEZST006KzAxMDAKVFpPRkZTRVRUTzorMDEwMApEVFNUQVJUOjE5NzAwMTAxVDAwMDAwMApFTkQ6U1RBTkRBUkQKRU5EOlZUSU1FWk9ORQpCRUdJTjpWRVZFTlQKRFRTVEFNUDoyMDI1MDYyMFQxMzQxMjBaClVJRDpyZWN1ci1tZWV0aW5nLTJAbW9jawpEVFNUQVJUO1RaSUQ9QWZyaWNhL0xhZ29zOjIwMjUwNjAyVDE1MDAwMApEVEVORDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDYwMlQxNjAwMDAKU1VNTUFSWTpQcm9qZWN0IFJldmlldwpFTkQ6VkVWRU5UCkJFR0lOO"{"task_description":"create ec2 on aws","calendar_file_content":"QkVHSU46VkNBTEVOREFSClZFUlNJT046Mi4wClBST0RJRDotLy9pY2FsLm1hcnVkb3QuY29tLy9pQ2FsIEV2ZW50IE1ha2VyCkNBTFNDQUxFOkdSRUdPUklBTgpCRUdJTjpWVElNRVpPTkUKVFpJRDpBZnJpY2EvTGFnb3MKTEFTVC1NT0RJRklFRDoyMDI0MDQyMlQwNTM0NTBaClRaVVJMOmh0dHBzOi8vd3d3LnR6dXJsLm9yZy96b25laW5mby1vdXRsb29rL0FmcmljYS9MYWdvcwpYLUxJQy1MT0NBVElPTjpBZnJpY2EvTGFnb3MKQkVHSU46U1RBTkRBUkQKVFpOQU1FOldBVApUWk9GRlNFVEZST006KzAxMDAKVFpPRkZTRVRUTzorMDEwMApEVFNUQVJUOjE5NzAwMTAxVDAwMDAwMApFTkQ6U1RBTkRBUkQKRU5EOlZUSU1FWk9ORQpCRUdJTjpWRVZFTlQKRFRTVEFNUDoyMDI1MDYyMFQxMzQxMjBaClVJRDpyZWN1ci1tZWV0aW5nLTFAbW9jawpEVFNUQVJUO1RaSUQ9QWZyaWNhL0xhZ29zOjIwMjUwNjAzVDEwMDAwMApEVEVORDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDYwM1QxMTAwMDAKU1VNTUFSWTpUZWFtIFN5bmMKRU5EOlZFVkVOVApCRUdJTjpWRVZFTlQKRFRTVEFNUDoyMDI1MDYyMFQxMzQxMjBaClVJRDpzaW5nbGUtZXZlbnQtMUBtb2NrCkRUU1RBUlQ7VFpJRD1BZnJpY2EvTGFnb3M6MjAyNTA2MDVUMTQwMDAwCkRURU5EO1RaSUQ9QWZyaWNhL0xhZ29zOjIwMjUwNjA1VDE1MDAwMApTVU1NQVJZOkNsaWVudCBDYWxsCkVORDpWRVZFTlQKQkVHSU46VkVWRU5UCkRUU1RBTVA6MjAyNTA2MjBUMTM0MTIwWgpVSUQ6c2luZ2xlLWV2ZW50LTRAbW9jawpEVFNUQVJUO1RaSUQ9QWZyaWNhL0xhZ29zOjIwMjUwNjE2VDE2MDAwMApEVEVORDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDYxNlQxNzAwMDAKU1VNTUFSWTpXb3Jrc2hvcApFTkQ6VkVWRU5UCkJFR0lOOlZFVkVOVApEVFNUQU1QOjIwMjUwNjIwVDEzNDEyMFoKVUlEOnNpbmdsZS1ldmVudC0zQG1vY2sKRFRTVEFSVDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDcwN1QxMTAwMDAKRFRFTkQ7VFpJRD1BZnJpY2EvTGFnb3M6MjAyNTA3MDdUMTIwMDAwClNVTU1BUlk6UGxhbm5pbmcgU2Vzc2lvbgpFTkQ6VkVWRU5UCkJFR0lOOlZFVkVOVApEVFNUQU1QOjIwMjUwNjIwVDEzNDEyMFoKVUlEOnNpbmdsZS1ldmVudC01QG1vY2sKRFRTVEFSVDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDcyMlQwOTAwMDAKRFRFTkQ7VFpJRD1BZnJpY2EvTGFnb3M6MjAyNTA3MjJUMTAwMDAwClNVTU1BUlk6RGVtbwpFTkQ6VkVWRU5UCkVORDpWQ0FMRU5EQVI="}""" logger.debug(f"Broken JSON length: {len(broken_json)}") # Find the error position (where the duplicate starts) error_pos = broken_json.find(';"{"task_description"') logger.debug(f"Error position (duplicate JSON start): {error_pos}") # Try to parse the broken JSON first to confirm it fails json_parse_failed = False try: json.loads(broken_json) logger.error("❌ UNEXPECTED: Broken JSON parsed successfully!") except json.JSONDecodeError as e: json_parse_failed = True logger.info(f"✅ Expected JSON error at position {e.pos}: {e}") logger.debug(f"Error context: '{broken_json[max(0, e.pos-20):e.pos+20]}'") assert json_parse_failed, "Expected broken JSON to fail parsing" # Test the repair function assembler = ToolCallAssembler() repaired_json = assembler._attempt_json_repair(broken_json) assert repaired_json is not None, "Repair should return a result" logger.info(f"✅ Repair attempted, result length: {len(repaired_json)}") logger.debug(f"Repaired preview: {repaired_json[:200]}...") # Try to parse the repaired JSON parsed = json.loads(repaired_json) logger.debug(f"Task description: {parsed.get('task_description', 'MISSING')}") logger.debug( f"Calendar content length: {len(parsed.get('calendar_file_content', ''))}" ) # Verify expected fields exist assert "task_description" in parsed, "Repaired JSON should have task_description" assert ( "calendar_file_content" in parsed ), "Repaired JSON should have calendar_file_content" assert ( parsed["task_description"] == "create ec2 on aws" ), "Task description should match expected value" logger.pass_test("Repaired JSON parses correctly") def test_simpler_corruption(): """Test a simpler case of JSON corruption for baseline functionality""" logger.start_test("Testing simpler JSON corruption") # Missing closing brace simple_broken = ( '{"task_description":"test task","calendar_file_content":"base64data"' ) assembler = ToolCallAssembler() repaired = assembler._attempt_json_repair(simple_broken) assert repaired is not None, "Simple repair should return a result" # Try to parse the repaired JSON parsed = json.loads(repaired) assert ( "task_description" in parsed ), "Simple repair should preserve task_description" assert parsed["task_description"] == "test task", "Task description should match" logger.pass_test("Simple repair works correctly") def test_datetime_serialization(): """Test our datetime serialization fixes""" logger.start_test("Testing datetime serialization") # Import our safe serialization function from ui.pages.chat import safe_json_dumps from datetime import datetime test_data = { "schedule": [ { "task": "Test Task", "start_time": datetime(2025, 6, 23, 10, 0), "end_time": datetime(2025, 6, 23, 11, 0), } ], "timestamp": datetime.now(), } result = safe_json_dumps(test_data, indent=2) logger.debug(f"Sample output: {result[:200]}...") # Verify it's valid JSON parsed_back = json.loads(result) assert "schedule" in parsed_back, "Serialized result should have schedule" assert "timestamp" in parsed_back, "Serialized result should have timestamp" assert len(parsed_back["schedule"]) == 1, "Schedule should have one item" logger.pass_test("Datetime serialization works correctly") def test_gradio_format(): """Test that we're returning the correct format for Gradio messages""" logger.start_test("Testing Gradio message format") # Simulate a proper messages format test_history = [ {"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi there!"}, {"role": "user", "content": "Create a schedule"}, ] # This is what our function should return expected_format = [ {"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi there!"}, {"role": "user", "content": "Create a schedule"}, {"role": "assistant", "content": "Schedule created successfully!"}, ] logger.debug("Expected format is list of dicts with 'role' and 'content' keys") # Validate the format for i, msg in enumerate(expected_format): assert isinstance(msg, dict), f"Message {i} should be a dict, got {type(msg)}" assert "role" in msg, f"Message {i} should have 'role' key" assert "content" in msg, f"Message {i} should have 'content' key" assert msg["role"] in [ "user", "assistant", ], f"Message {i} has invalid role: {msg['role']}" logger.pass_test("Message format is correct for Gradio") if __name__ == "__main__": logger.section("JSON Repair and Chat Functionality Tests") # Create test results tracker results = create_test_results(logger) # Run tests using the standardized approach results.run_test("streaming_error_repair", test_actual_streaming_error) results.run_test("simple_json_repair", test_simpler_corruption) results.run_test("datetime_serialization", test_datetime_serialization) results.run_test("gradio_message_format", test_gradio_format) # Generate summary and exit with appropriate code all_passed = results.summary() sys.exit(0 if all_passed else 1)