Spaces:

blackopsrepl
/

yuga-planner

Paused

File size: 8,747 Bytes

e2685f7

import json
import sys


# Add src to path to import our modules
sys.path.insert(0, "src")

from handlers.tool_call_handler import ToolCallAssembler

# Import standardized test utilities
from tests.test_utils import get_test_logger, create_test_results

# Initialize standardized test logger
logger = get_test_logger(__name__)


def test_actual_streaming_error():
    """Test the JSON repair functionality with the actual streaming error pattern"""

    logger.start_test("Testing JSON repair functionality with actual streaming error")

    # This is based on the actual error from the logs at character 787
    # The pattern shows base64 data ending abruptly with a quote and then a duplicate JSON object
    broken_json = """{"task_description":"create ec2 on aws","calendar_file_content":"QkVHSU46VkNBTEVOREFSClZFUlNJT046Mi4wClBST0RJRDotLy9pY2FsLm1hcnVkb3QuY29tLy9pQ2FsIEV2ZW50IE1ha2VyCkNBTFNDQUxFOkdSRUdPUklBTgpCRUdJTjpWVElNRVpPTkUKVFpJRDpBZnJpY2EvTGFnb3MKTEFTVC1NT0RJRklFRDoyMDI0MDQyMlQwNTM0NTBaClRaVVJMOmh0dHBzOi8vd3d3LnR6dXJsLm9yZy96b25laW5mby1vdXRsb29rL0FmcmljYS9MYWdvcwpYLUxJQy1MT0NBVElPTjpBZnJpY2EvTGFnb3MKQkVHSU46U1RBTkRBUkQKVFpOQU1FOldBVApUWk9GRlNFVEZST006KzAxMDAKVFpPRkZTRVRUTzorMDEwMApEVFNUQVJUOjE5NzAwMTAxVDAwMDAwMApFTkQ6U1RBTkRBUkQKRU5EOlZUSU1FWk9ORQpCRUdJTjpWRVZFTlQKRFRTVEFNUDoyMDI1MDYyMFQxMzQxMjBaClVJRDpyZWN1ci1tZWV0aW5nLTJAbW9jawpEVFNUQVJUO1RaSUQ9QWZyaWNhL0xhZ29zOjIwMjUwNjAyVDE1MDAwMApEVEVORDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDYwMlQxNjAwMDAKU1VNTUFSWTpQcm9qZWN0IFJldmlldwpFTkQ6VkVWRU5UCkJFR0lOO"{"task_description":"create ec2 on aws","calendar_file_content":"QkVHSU46VkNBTEVOREFSClZFUlNJT046Mi4wClBST0RJRDotLy9pY2FsLm1hcnVkb3QuY29tLy9pQ2FsIEV2ZW50IE1ha2VyCkNBTFNDQUxFOkdSRUdPUklBTgpCRUdJTjpWVElNRVpPTkUKVFpJRDpBZnJpY2EvTGFnb3MKTEFTVC1NT0RJRklFRDoyMDI0MDQyMlQwNTM0NTBaClRaVVJMOmh0dHBzOi8vd3d3LnR6dXJsLm9yZy96b25laW5mby1vdXRsb29rL0FmcmljYS9MYWdvcwpYLUxJQy1MT0NBVElPTjpBZnJpY2EvTGFnb3MKQkVHSU46U1RBTkRBUkQKVFpOQU1FOldBVApUWk9GRlNFVEZST006KzAxMDAKVFpPRkZTRVRUTzorMDEwMApEVFNUQVJUOjE5NzAwMTAxVDAwMDAwMApFTkQ6U1RBTkRBUkQKRU5EOlZUSU1FWk9ORQpCRUdJTjpWRVZFTlQKRFRTVEFNUDoyMDI1MDYyMFQxMzQxMjBaClVJRDpyZWN1ci1tZWV0aW5nLTFAbW9jawpEVFNUQVJUO1RaSUQ9QWZyaWNhL0xhZ29zOjIwMjUwNjAzVDEwMDAwMApEVEVORDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDYwM1QxMTAwMDAKU1VNTUFSWTpUZWFtIFN5bmMKRU5EOlZFVkVOVApCRUdJTjpWRVZFTlQKRFRTVEFNUDoyMDI1MDYyMFQxMzQxMjBaClVJRDpzaW5nbGUtZXZlbnQtMUBtb2NrCkRUU1RBUlQ7VFpJRD1BZnJpY2EvTGFnb3M6MjAyNTA2MDVUMTQwMDAwCkRURU5EO1RaSUQ9QWZyaWNhL0xhZ29zOjIwMjUwNjA1VDE1MDAwMApTVU1NQVJZOkNsaWVudCBDYWxsCkVORDpWRVZFTlQKQkVHSU46VkVWRU5UCkRUU1RBTVA6MjAyNTA2MjBUMTM0MTIwWgpVSUQ6c2luZ2xlLWV2ZW50LTRAbW9jawpEVFNUQVJUO1RaSUQ9QWZyaWNhL0xhZ29zOjIwMjUwNjE2VDE2MDAwMApEVEVORDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDYxNlQxNzAwMDAKU1VNTUFSWTpXb3Jrc2hvcApFTkQ6VkVWRU5UCkJFR0lOOlZFVkVOVApEVFNUQU1QOjIwMjUwNjIwVDEzNDEyMFoKVUlEOnNpbmdsZS1ldmVudC0zQG1vY2sKRFRTVEFSVDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDcwN1QxMTAwMDAKRFRFTkQ7VFpJRD1BZnJpY2EvTGFnb3M6MjAyNTA3MDdUMTIwMDAwClNVTU1BUlk6UGxhbm5pbmcgU2Vzc2lvbgpFTkQ6VkVWRU5UCkJFR0lOOlZFVkVOVApEVFNUQU1QOjIwMjUwNjIwVDEzNDEyMFoKVUlEOnNpbmdsZS1ldmVudC01QG1vY2sKRFRTVEFSVDtUWklEPUFmcmljYS9MYWdvczoyMDI1MDcyMlQwOTAwMDAKRFRFTkQ7VFpJRD1BZnJpY2EvTGFnb3M6MjAyNTA3MjJUMTAwMDAwClNVTU1BUlk6RGVtbwpFTkQ6VkVWRU5UCkVORDpWQ0FMRU5EQVI="}"""

    logger.debug(f"Broken JSON length: {len(broken_json)}")

    # Find the error position (where the duplicate starts)
    error_pos = broken_json.find(';"{"task_description"')
    logger.debug(f"Error position (duplicate JSON start): {error_pos}")

    # Try to parse the broken JSON first to confirm it fails
    json_parse_failed = False
    try:
        json.loads(broken_json)
        logger.error("❌ UNEXPECTED: Broken JSON parsed successfully!")
    except json.JSONDecodeError as e:
        json_parse_failed = True
        logger.info(f"✅ Expected JSON error at position {e.pos}: {e}")
        logger.debug(f"Error context: '{broken_json[max(0, e.pos-20):e.pos+20]}'")

    assert json_parse_failed, "Expected broken JSON to fail parsing"

    # Test the repair function
    assembler = ToolCallAssembler()
    repaired_json = assembler._attempt_json_repair(broken_json)

    assert repaired_json is not None, "Repair should return a result"

    logger.info(f"✅ Repair attempted, result length: {len(repaired_json)}")
    logger.debug(f"Repaired preview: {repaired_json[:200]}...")

    # Try to parse the repaired JSON
    parsed = json.loads(repaired_json)
    logger.debug(f"Task description: {parsed.get('task_description', 'MISSING')}")
    logger.debug(
        f"Calendar content length: {len(parsed.get('calendar_file_content', ''))}"
    )

    # Verify expected fields exist
    assert "task_description" in parsed, "Repaired JSON should have task_description"
    assert (
        "calendar_file_content" in parsed
    ), "Repaired JSON should have calendar_file_content"
    assert (
        parsed["task_description"] == "create ec2 on aws"
    ), "Task description should match expected value"

    logger.pass_test("Repaired JSON parses correctly")


def test_simpler_corruption():
    """Test a simpler case of JSON corruption for baseline functionality"""

    logger.start_test("Testing simpler JSON corruption")

    # Missing closing brace
    simple_broken = (
        '{"task_description":"test task","calendar_file_content":"base64data"'
    )

    assembler = ToolCallAssembler()
    repaired = assembler._attempt_json_repair(simple_broken)

    assert repaired is not None, "Simple repair should return a result"

    # Try to parse the repaired JSON
    parsed = json.loads(repaired)

    assert (
        "task_description" in parsed
    ), "Simple repair should preserve task_description"
    assert parsed["task_description"] == "test task", "Task description should match"

    logger.pass_test("Simple repair works correctly")


def test_datetime_serialization():
    """Test our datetime serialization fixes"""

    logger.start_test("Testing datetime serialization")

    # Import our safe serialization function
    from ui.pages.chat import safe_json_dumps
    from datetime import datetime

    test_data = {
        "schedule": [
            {
                "task": "Test Task",
                "start_time": datetime(2025, 6, 23, 10, 0),
                "end_time": datetime(2025, 6, 23, 11, 0),
            }
        ],
        "timestamp": datetime.now(),
    }

    result = safe_json_dumps(test_data, indent=2)
    logger.debug(f"Sample output: {result[:200]}...")

    # Verify it's valid JSON
    parsed_back = json.loads(result)
    assert "schedule" in parsed_back, "Serialized result should have schedule"
    assert "timestamp" in parsed_back, "Serialized result should have timestamp"
    assert len(parsed_back["schedule"]) == 1, "Schedule should have one item"

    logger.pass_test("Datetime serialization works correctly")


def test_gradio_format():
    """Test that we're returning the correct format for Gradio messages"""

    logger.start_test("Testing Gradio message format")

    # Simulate a proper messages format
    test_history = [
        {"role": "user", "content": "Hello"},
        {"role": "assistant", "content": "Hi there!"},
        {"role": "user", "content": "Create a schedule"},
    ]

    # This is what our function should return
    expected_format = [
        {"role": "user", "content": "Hello"},
        {"role": "assistant", "content": "Hi there!"},
        {"role": "user", "content": "Create a schedule"},
        {"role": "assistant", "content": "Schedule created successfully!"},
    ]

    logger.debug("Expected format is list of dicts with 'role' and 'content' keys")

    # Validate the format
    for i, msg in enumerate(expected_format):
        assert isinstance(msg, dict), f"Message {i} should be a dict, got {type(msg)}"
        assert "role" in msg, f"Message {i} should have 'role' key"
        assert "content" in msg, f"Message {i} should have 'content' key"
        assert msg["role"] in [
            "user",
            "assistant",
        ], f"Message {i} has invalid role: {msg['role']}"

    logger.pass_test("Message format is correct for Gradio")


if __name__ == "__main__":
    logger.section("JSON Repair and Chat Functionality Tests")

    # Create test results tracker
    results = create_test_results(logger)

    # Run tests using the standardized approach
    results.run_test("streaming_error_repair", test_actual_streaming_error)
    results.run_test("simple_json_repair", test_simpler_corruption)
    results.run_test("datetime_serialization", test_datetime_serialization)
    results.run_test("gradio_message_format", test_gradio_format)

    # Generate summary and exit with appropriate code
    all_passed = results.summary()
    sys.exit(0 if all_passed else 1)