Spaces:
Running
Running
#!/usr/bin/env python3 | |
""" | |
Test script: Verify that the run_evaluation function works correctly | |
""" | |
import asyncio | |
import os | |
import sys | |
from pathlib import Path | |
from dotenv import load_dotenv | |
import argparse | |
from mmengine import DictAction | |
# ๅ ่ฝฝ็ฏๅขๅ้ | |
load_dotenv(verbose=True) | |
# ่ฎพ็ฝฎๆ น็ฎๅฝ่ทฏๅพ | |
root = str(Path(__file__).resolve().parents[1]) | |
sys.path.append(root) | |
from src.database import db | |
from src.logger import logger | |
from src.config import config | |
from src.agents.evaluator import run_evaluation | |
def parse_args(): | |
"""Parse command line arguments""" | |
parser = argparse.ArgumentParser(description='main') | |
parser.add_argument("--config", default=os.path.join(root, "configs", "paper_agent.py"), help="config file path") | |
parser.add_argument( | |
'--cfg-options', | |
nargs='+', | |
action=DictAction, | |
help='override some settings in the used config, the key-value pair ' | |
'in xxx=yyy format will be merged into config file. If the value to ' | |
'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' | |
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' | |
'Note that the quotation marks are necessary and that no white space ' | |
'is allowed.') | |
args = parser.parse_args() | |
return args | |
async def test_evaluation(): | |
"""Test evaluation functionality""" | |
print("=== Starting Evaluation Test ===") | |
# Test parameters | |
test_arxiv_id = "2508.09889" # Use existing paper in database | |
test_pdf_url = f"https://arxiv.org/pdf/{test_arxiv_id}.pdf" | |
print(f"Test paper ID: {test_arxiv_id}") | |
print(f"PDF URL: {test_pdf_url}") | |
# Check API key | |
api_key = os.getenv("ANTHROPIC_API_KEY") | |
if not api_key: | |
print("โ Error: ANTHROPIC_API_KEY environment variable not found") | |
return False | |
print(f"โ API key is set: {api_key[:20]}...") | |
try: | |
# Check if paper exists in database | |
paper = await db.get_paper(test_arxiv_id) | |
if paper: | |
print(f"โ Paper found in database: {paper['title']}") | |
else: | |
print(f"โ ๏ธ Paper not in database, creating new record") | |
# Insert test paper | |
await db.insert_paper( | |
arxiv_id=test_arxiv_id, | |
title="Test Paper for Evaluation", | |
authors="Test Author", | |
abstract="This is a test paper for evaluation.", | |
categories="cs.AI", | |
published_date="2024-08-01" | |
) | |
print(f"โ Test paper inserted into database") | |
print("\n=== Starting Evaluation ===") | |
# Run evaluation | |
result = await run_evaluation( | |
pdf_path=test_pdf_url, | |
arxiv_id=test_arxiv_id, | |
api_key=api_key | |
) | |
print(f"\n=== Evaluation Results ===") | |
print(f"Result length: {len(result)} characters") | |
print(f"First 500 characters: {result[:500]}...") | |
# Check if result contains expected content | |
if "AI Automation Assessment" in result or "Executive Summary" in result: | |
print("โ Evaluation result contains expected content") | |
else: | |
print("โ ๏ธ Evaluation result may be incomplete") | |
# Check evaluation status in database | |
updated_paper = await db.get_paper(test_arxiv_id) | |
if updated_paper and updated_paper.get('is_evaluated'): | |
print("โ Evaluation saved to database") | |
print(f"Evaluation score: {updated_paper.get('evaluation_score')}") | |
print(f"Evaluation tags: {updated_paper.get('evaluation_tags')}") | |
else: | |
print("โ Evaluation not saved to database") | |
return True | |
except Exception as e: | |
print(f"โ Error during evaluation: {str(e)}") | |
import traceback | |
traceback.print_exc() | |
return False | |
async def test_database_operations(): | |
"""Test database operations""" | |
print("\n=== Testing Database Operations ===") | |
try: | |
# Test getting paper | |
paper = await db.get_paper("2508.09889") | |
if paper: | |
print(f"โ Database connection OK, found paper: {paper['title']}") | |
else: | |
print("โ ๏ธ Test paper not found in database") | |
# Test getting paper statistics | |
stats = await db.get_papers_count() | |
print(f"โ Paper statistics: Total={stats['total']}, Evaluated={stats['evaluated']}, Unevaluated={stats['unevaluated']}") | |
return True | |
except Exception as e: | |
print(f"โ Database operation error: {str(e)}") | |
return False | |
async def main(): | |
"""Main test function""" | |
print("๐ Starting Evaluation System Test") | |
# Parse command line arguments | |
args = parse_args() | |
# Initialize configuration | |
config.init_config(args.config, args) | |
# Initialize logger | |
logger.init_logger(config=config) | |
logger.info(f"| Logger initialized at: {config.log_path}") | |
logger.info(f"| Config:\n{config.pretty_text}") | |
# Initialize database | |
await db.init_db(config=config) | |
logger.info(f"| Database initialized at: {config.db_path}") | |
print(f"โ Database initialized: {config.db_path}") | |
# Test database operations | |
db_success = await test_database_operations() | |
# Test evaluation functionality | |
eval_success = await test_evaluation() | |
print("\n=== Test Summary ===") | |
print(f"Database operations: {'โ Success' if db_success else 'โ Failed'}") | |
print(f"Evaluation functionality: {'โ Success' if eval_success else 'โ Failed'}") | |
if db_success and eval_success: | |
print("๐ All tests passed!") | |
else: | |
print("โ ๏ธ Some tests failed, please check error messages") | |
if __name__ == "__main__": | |
asyncio.run(main()) | |