File size: 5,939 Bytes
583741e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78f6650
583741e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78f6650
583741e
 
 
 
 
78f6650
583741e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78f6650
583741e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78f6650
583741e
 
 
 
 
 
78f6650
583741e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78f6650
583741e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#!/usr/bin/env python3
"""
Test script: Verify that the run_evaluation function works correctly
"""

import asyncio
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
import argparse
from mmengine import DictAction

# ๅŠ ่ฝฝ็Žฏๅขƒๅ˜้‡
load_dotenv(verbose=True)

# ่ฎพ็ฝฎๆ น็›ฎๅฝ•่ทฏๅพ„
root = str(Path(__file__).resolve().parents[1])
sys.path.append(root)

from src.database import db
from src.logger import logger
from src.config import config
from src.agents.evaluator import run_evaluation


def parse_args():
    """Parse command line arguments"""
    parser = argparse.ArgumentParser(description='main')
    parser.add_argument("--config", default=os.path.join(root, "configs", "paper_agent.py"), help="config file path")

    parser.add_argument(
        '--cfg-options',
        nargs='+',
        action=DictAction,
        help='override some settings in the used config, the key-value pair '
        'in xxx=yyy format will be merged into config file. If the value to '
        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
        'Note that the quotation marks are necessary and that no white space '
        'is allowed.')
    args = parser.parse_args()
    return args


async def test_evaluation():
    """Test evaluation functionality"""
    print("=== Starting Evaluation Test ===")
    
    # Test parameters
    test_arxiv_id = "2508.09889"  # Use existing paper in database
    test_pdf_url = f"https://arxiv.org/pdf/{test_arxiv_id}.pdf"
    
    print(f"Test paper ID: {test_arxiv_id}")
    print(f"PDF URL: {test_pdf_url}")
    
    # Check API key
    api_key = os.getenv("ANTHROPIC_API_KEY")
    if not api_key:
        print("โŒ Error: ANTHROPIC_API_KEY environment variable not found")
        return False
    
    print(f"โœ… API key is set: {api_key[:20]}...")
    
    try:
        # Check if paper exists in database
        paper = await db.get_paper(test_arxiv_id)
        if paper:
            print(f"โœ… Paper found in database: {paper['title']}")
        else:
            print(f"โš ๏ธ  Paper not in database, creating new record")
            # Insert test paper
            await db.insert_paper(
                arxiv_id=test_arxiv_id,
                title="Test Paper for Evaluation",
                authors="Test Author",
                abstract="This is a test paper for evaluation.",
                categories="cs.AI",
                published_date="2024-08-01"
            )
            print(f"โœ… Test paper inserted into database")
        
        print("\n=== Starting Evaluation ===")
        
        # Run evaluation
        result = await run_evaluation(
            pdf_path=test_pdf_url,
            arxiv_id=test_arxiv_id,
            api_key=api_key
        )
        
        print(f"\n=== Evaluation Results ===")
        print(f"Result length: {len(result)} characters")
        print(f"First 500 characters: {result[:500]}...")
        
        # Check if result contains expected content
        if "AI Automation Assessment" in result or "Executive Summary" in result:
            print("โœ… Evaluation result contains expected content")
        else:
            print("โš ๏ธ  Evaluation result may be incomplete")
        
        # Check evaluation status in database
        updated_paper = await db.get_paper(test_arxiv_id)
        if updated_paper and updated_paper.get('is_evaluated'):
            print("โœ… Evaluation saved to database")
            print(f"Evaluation score: {updated_paper.get('evaluation_score')}")
            print(f"Evaluation tags: {updated_paper.get('evaluation_tags')}")
        else:
            print("โŒ Evaluation not saved to database")
        
        return True
        
    except Exception as e:
        print(f"โŒ Error during evaluation: {str(e)}")
        import traceback
        traceback.print_exc()
        return False


async def test_database_operations():
    """Test database operations"""
    print("\n=== Testing Database Operations ===")
    
    try:
        # Test getting paper
        paper = await db.get_paper("2508.09889")
        if paper:
            print(f"โœ… Database connection OK, found paper: {paper['title']}")
        else:
            print("โš ๏ธ  Test paper not found in database")
        
        # Test getting paper statistics
        stats = await db.get_papers_count()
        print(f"โœ… Paper statistics: Total={stats['total']}, Evaluated={stats['evaluated']}, Unevaluated={stats['unevaluated']}")
        
        return True
        
    except Exception as e:
        print(f"โŒ Database operation error: {str(e)}")
        return False


async def main():
    """Main test function"""
    print("๐Ÿš€ Starting Evaluation System Test")
    
    # Parse command line arguments
    args = parse_args()

    # Initialize configuration
    config.init_config(args.config, args)

    # Initialize logger
    logger.init_logger(config=config)
    logger.info(f"| Logger initialized at: {config.log_path}")
    logger.info(f"| Config:\n{config.pretty_text}")

    # Initialize database
    await db.init_db(config=config)
    logger.info(f"| Database initialized at: {config.db_path}")
    
    print(f"โœ… Database initialized: {config.db_path}")
    
    # Test database operations
    db_success = await test_database_operations()
    
    # Test evaluation functionality
    eval_success = await test_evaluation()
    
    print("\n=== Test Summary ===")
    print(f"Database operations: {'โœ… Success' if db_success else 'โŒ Failed'}")
    print(f"Evaluation functionality: {'โœ… Success' if eval_success else 'โŒ Failed'}")
    
    if db_success and eval_success:
        print("๐ŸŽ‰ All tests passed!")
    else:
        print("โš ๏ธ  Some tests failed, please check error messages")


if __name__ == "__main__":
    asyncio.run(main())