Spaces:
Running
Running
File size: 5,939 Bytes
583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e 78f6650 583741e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
#!/usr/bin/env python3
"""
Test script: Verify that the run_evaluation function works correctly
"""
import asyncio
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
import argparse
from mmengine import DictAction
# ๅ ่ฝฝ็ฏๅขๅ้
load_dotenv(verbose=True)
# ่ฎพ็ฝฎๆ น็ฎๅฝ่ทฏๅพ
root = str(Path(__file__).resolve().parents[1])
sys.path.append(root)
from src.database import db
from src.logger import logger
from src.config import config
from src.agents.evaluator import run_evaluation
def parse_args():
"""Parse command line arguments"""
parser = argparse.ArgumentParser(description='main')
parser.add_argument("--config", default=os.path.join(root, "configs", "paper_agent.py"), help="config file path")
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
args = parser.parse_args()
return args
async def test_evaluation():
"""Test evaluation functionality"""
print("=== Starting Evaluation Test ===")
# Test parameters
test_arxiv_id = "2508.09889" # Use existing paper in database
test_pdf_url = f"https://arxiv.org/pdf/{test_arxiv_id}.pdf"
print(f"Test paper ID: {test_arxiv_id}")
print(f"PDF URL: {test_pdf_url}")
# Check API key
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
print("โ Error: ANTHROPIC_API_KEY environment variable not found")
return False
print(f"โ
API key is set: {api_key[:20]}...")
try:
# Check if paper exists in database
paper = await db.get_paper(test_arxiv_id)
if paper:
print(f"โ
Paper found in database: {paper['title']}")
else:
print(f"โ ๏ธ Paper not in database, creating new record")
# Insert test paper
await db.insert_paper(
arxiv_id=test_arxiv_id,
title="Test Paper for Evaluation",
authors="Test Author",
abstract="This is a test paper for evaluation.",
categories="cs.AI",
published_date="2024-08-01"
)
print(f"โ
Test paper inserted into database")
print("\n=== Starting Evaluation ===")
# Run evaluation
result = await run_evaluation(
pdf_path=test_pdf_url,
arxiv_id=test_arxiv_id,
api_key=api_key
)
print(f"\n=== Evaluation Results ===")
print(f"Result length: {len(result)} characters")
print(f"First 500 characters: {result[:500]}...")
# Check if result contains expected content
if "AI Automation Assessment" in result or "Executive Summary" in result:
print("โ
Evaluation result contains expected content")
else:
print("โ ๏ธ Evaluation result may be incomplete")
# Check evaluation status in database
updated_paper = await db.get_paper(test_arxiv_id)
if updated_paper and updated_paper.get('is_evaluated'):
print("โ
Evaluation saved to database")
print(f"Evaluation score: {updated_paper.get('evaluation_score')}")
print(f"Evaluation tags: {updated_paper.get('evaluation_tags')}")
else:
print("โ Evaluation not saved to database")
return True
except Exception as e:
print(f"โ Error during evaluation: {str(e)}")
import traceback
traceback.print_exc()
return False
async def test_database_operations():
"""Test database operations"""
print("\n=== Testing Database Operations ===")
try:
# Test getting paper
paper = await db.get_paper("2508.09889")
if paper:
print(f"โ
Database connection OK, found paper: {paper['title']}")
else:
print("โ ๏ธ Test paper not found in database")
# Test getting paper statistics
stats = await db.get_papers_count()
print(f"โ
Paper statistics: Total={stats['total']}, Evaluated={stats['evaluated']}, Unevaluated={stats['unevaluated']}")
return True
except Exception as e:
print(f"โ Database operation error: {str(e)}")
return False
async def main():
"""Main test function"""
print("๐ Starting Evaluation System Test")
# Parse command line arguments
args = parse_args()
# Initialize configuration
config.init_config(args.config, args)
# Initialize logger
logger.init_logger(config=config)
logger.info(f"| Logger initialized at: {config.log_path}")
logger.info(f"| Config:\n{config.pretty_text}")
# Initialize database
await db.init_db(config=config)
logger.info(f"| Database initialized at: {config.db_path}")
print(f"โ
Database initialized: {config.db_path}")
# Test database operations
db_success = await test_database_operations()
# Test evaluation functionality
eval_success = await test_evaluation()
print("\n=== Test Summary ===")
print(f"Database operations: {'โ
Success' if db_success else 'โ Failed'}")
print(f"Evaluation functionality: {'โ
Success' if eval_success else 'โ Failed'}")
if db_success and eval_success:
print("๐ All tests passed!")
else:
print("โ ๏ธ Some tests failed, please check error messages")
if __name__ == "__main__":
asyncio.run(main())
|