paperindex / test /test_evaluation.py
DVampire
update website
78f6650
#!/usr/bin/env python3
"""
Test script: Verify that the run_evaluation function works correctly
"""
import asyncio
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
import argparse
from mmengine import DictAction
# ๅŠ ่ฝฝ็Žฏๅขƒๅ˜้‡
load_dotenv(verbose=True)
# ่ฎพ็ฝฎๆ น็›ฎๅฝ•่ทฏๅพ„
root = str(Path(__file__).resolve().parents[1])
sys.path.append(root)
from src.database import db
from src.logger import logger
from src.config import config
from src.agents.evaluator import run_evaluation
def parse_args():
"""Parse command line arguments"""
parser = argparse.ArgumentParser(description='main')
parser.add_argument("--config", default=os.path.join(root, "configs", "paper_agent.py"), help="config file path")
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
args = parser.parse_args()
return args
async def test_evaluation():
"""Test evaluation functionality"""
print("=== Starting Evaluation Test ===")
# Test parameters
test_arxiv_id = "2508.09889" # Use existing paper in database
test_pdf_url = f"https://arxiv.org/pdf/{test_arxiv_id}.pdf"
print(f"Test paper ID: {test_arxiv_id}")
print(f"PDF URL: {test_pdf_url}")
# Check API key
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
print("โŒ Error: ANTHROPIC_API_KEY environment variable not found")
return False
print(f"โœ… API key is set: {api_key[:20]}...")
try:
# Check if paper exists in database
paper = await db.get_paper(test_arxiv_id)
if paper:
print(f"โœ… Paper found in database: {paper['title']}")
else:
print(f"โš ๏ธ Paper not in database, creating new record")
# Insert test paper
await db.insert_paper(
arxiv_id=test_arxiv_id,
title="Test Paper for Evaluation",
authors="Test Author",
abstract="This is a test paper for evaluation.",
categories="cs.AI",
published_date="2024-08-01"
)
print(f"โœ… Test paper inserted into database")
print("\n=== Starting Evaluation ===")
# Run evaluation
result = await run_evaluation(
pdf_path=test_pdf_url,
arxiv_id=test_arxiv_id,
api_key=api_key
)
print(f"\n=== Evaluation Results ===")
print(f"Result length: {len(result)} characters")
print(f"First 500 characters: {result[:500]}...")
# Check if result contains expected content
if "AI Automation Assessment" in result or "Executive Summary" in result:
print("โœ… Evaluation result contains expected content")
else:
print("โš ๏ธ Evaluation result may be incomplete")
# Check evaluation status in database
updated_paper = await db.get_paper(test_arxiv_id)
if updated_paper and updated_paper.get('is_evaluated'):
print("โœ… Evaluation saved to database")
print(f"Evaluation score: {updated_paper.get('evaluation_score')}")
print(f"Evaluation tags: {updated_paper.get('evaluation_tags')}")
else:
print("โŒ Evaluation not saved to database")
return True
except Exception as e:
print(f"โŒ Error during evaluation: {str(e)}")
import traceback
traceback.print_exc()
return False
async def test_database_operations():
"""Test database operations"""
print("\n=== Testing Database Operations ===")
try:
# Test getting paper
paper = await db.get_paper("2508.09889")
if paper:
print(f"โœ… Database connection OK, found paper: {paper['title']}")
else:
print("โš ๏ธ Test paper not found in database")
# Test getting paper statistics
stats = await db.get_papers_count()
print(f"โœ… Paper statistics: Total={stats['total']}, Evaluated={stats['evaluated']}, Unevaluated={stats['unevaluated']}")
return True
except Exception as e:
print(f"โŒ Database operation error: {str(e)}")
return False
async def main():
"""Main test function"""
print("๐Ÿš€ Starting Evaluation System Test")
# Parse command line arguments
args = parse_args()
# Initialize configuration
config.init_config(args.config, args)
# Initialize logger
logger.init_logger(config=config)
logger.info(f"| Logger initialized at: {config.log_path}")
logger.info(f"| Config:\n{config.pretty_text}")
# Initialize database
await db.init_db(config=config)
logger.info(f"| Database initialized at: {config.db_path}")
print(f"โœ… Database initialized: {config.db_path}")
# Test database operations
db_success = await test_database_operations()
# Test evaluation functionality
eval_success = await test_evaluation()
print("\n=== Test Summary ===")
print(f"Database operations: {'โœ… Success' if db_success else 'โŒ Failed'}")
print(f"Evaluation functionality: {'โœ… Success' if eval_success else 'โŒ Failed'}")
if db_success and eval_success:
print("๐ŸŽ‰ All tests passed!")
else:
print("โš ๏ธ Some tests failed, please check error messages")
if __name__ == "__main__":
asyncio.run(main())