Spaces:

zwt963
/

paperindex

Running

File size: 5,939 Bytes

#!/usr/bin/env python3
"""
Test script: Verify that the run_evaluation function works correctly
"""

import asyncio
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
import argparse
from mmengine import DictAction

# 加载环境变量
load_dotenv(verbose=True)

# 设置根目录路径
root = str(Path(__file__).resolve().parents[1])
sys.path.append(root)

from src.database import db
from src.logger import logger
from src.config import config
from src.agents.evaluator import run_evaluation


def parse_args():
    """Parse command line arguments"""
    parser = argparse.ArgumentParser(description='main')
    parser.add_argument("--config", default=os.path.join(root, "configs", "paper_agent.py"), help="config file path")

    parser.add_argument(
        '--cfg-options',
        nargs='+',
        action=DictAction,
        help='override some settings in the used config, the key-value pair '
        'in xxx=yyy format will be merged into config file. If the value to '
        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
        'Note that the quotation marks are necessary and that no white space '
        'is allowed.')
    args = parser.parse_args()
    return args


async def test_evaluation():
    """Test evaluation functionality"""
    print("=== Starting Evaluation Test ===")
    
    # Test parameters
    test_arxiv_id = "2508.09889"  # Use existing paper in database
    test_pdf_url = f"https://arxiv.org/pdf/{test_arxiv_id}.pdf"
    
    print(f"Test paper ID: {test_arxiv_id}")
    print(f"PDF URL: {test_pdf_url}")
    
    # Check API key
    api_key = os.getenv("ANTHROPIC_API_KEY")
    if not api_key:
        print("❌ Error: ANTHROPIC_API_KEY environment variable not found")
        return False
    
    print(f"✅ API key is set: {api_key[:20]}...")
    
    try:
        # Check if paper exists in database
        paper = await db.get_paper(test_arxiv_id)
        if paper:
            print(f"✅ Paper found in database: {paper['title']}")
        else:
            print(f"⚠️  Paper not in database, creating new record")
            # Insert test paper
            await db.insert_paper(
                arxiv_id=test_arxiv_id,
                title="Test Paper for Evaluation",
                authors="Test Author",
                abstract="This is a test paper for evaluation.",
                categories="cs.AI",
                published_date="2024-08-01"
            )
            print(f"✅ Test paper inserted into database")
        
        print("\n=== Starting Evaluation ===")
        
        # Run evaluation
        result = await run_evaluation(
            pdf_path=test_pdf_url,
            arxiv_id=test_arxiv_id,
            api_key=api_key
        )
        
        print(f"\n=== Evaluation Results ===")
        print(f"Result length: {len(result)} characters")
        print(f"First 500 characters: {result[:500]}...")
        
        # Check if result contains expected content
        if "AI Automation Assessment" in result or "Executive Summary" in result:
            print("✅ Evaluation result contains expected content")
        else:
            print("⚠️  Evaluation result may be incomplete")
        
        # Check evaluation status in database
        updated_paper = await db.get_paper(test_arxiv_id)
        if updated_paper and updated_paper.get('is_evaluated'):
            print("✅ Evaluation saved to database")
            print(f"Evaluation score: {updated_paper.get('evaluation_score')}")
            print(f"Evaluation tags: {updated_paper.get('evaluation_tags')}")
        else:
            print("❌ Evaluation not saved to database")
        
        return True
        
    except Exception as e:
        print(f"❌ Error during evaluation: {str(e)}")
        import traceback
        traceback.print_exc()
        return False


async def test_database_operations():
    """Test database operations"""
    print("\n=== Testing Database Operations ===")
    
    try:
        # Test getting paper
        paper = await db.get_paper("2508.09889")
        if paper:
            print(f"✅ Database connection OK, found paper: {paper['title']}")
        else:
            print("⚠️  Test paper not found in database")
        
        # Test getting paper statistics
        stats = await db.get_papers_count()
        print(f"✅ Paper statistics: Total={stats['total']}, Evaluated={stats['evaluated']}, Unevaluated={stats['unevaluated']}")
        
        return True
        
    except Exception as e:
        print(f"❌ Database operation error: {str(e)}")
        return False


async def main():
    """Main test function"""
    print("🚀 Starting Evaluation System Test")
    
    # Parse command line arguments
    args = parse_args()

    # Initialize configuration
    config.init_config(args.config, args)

    # Initialize logger
    logger.init_logger(config=config)
    logger.info(f"| Logger initialized at: {config.log_path}")
    logger.info(f"| Config:\n{config.pretty_text}")

    # Initialize database
    await db.init_db(config=config)
    logger.info(f"| Database initialized at: {config.db_path}")
    
    print(f"✅ Database initialized: {config.db_path}")
    
    # Test database operations
    db_success = await test_database_operations()
    
    # Test evaluation functionality
    eval_success = await test_evaluation()
    
    print("\n=== Test Summary ===")
    print(f"Database operations: {'✅ Success' if db_success else '❌ Failed'}")
    print(f"Evaluation functionality: {'✅ Success' if eval_success else '❌ Failed'}")
    
    if db_success and eval_success:
        print("🎉 All tests passed!")
    else:
        print("⚠️  Some tests failed, please check error messages")


if __name__ == "__main__":
    asyncio.run(main())