File size: 3,048 Bytes
f011b22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""
Test script to debug metadata loading and file finding.
"""
import os
import json
import sys

# Add the parent directory to sys.path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from agent.utils.question_analyzer import QuestionAnalyzer

def main():
    """Main function to test metadata loading and file finding."""
    # Get the resource directory
    resource_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'resource')
    print(f"Resource directory: {resource_dir}")
    
    # Check if the directory exists
    if not os.path.exists(resource_dir):
        print(f"Resource directory does not exist: {resource_dir}")
        return
    
    # List files in the directory
    print("Files in resource directory:")
    for file in os.listdir(resource_dir):
        print(f"  {file}")
    
    # Check for metadata.jsonl
    metadata_path = os.path.join(resource_dir, 'metadata.jsonl')
    if not os.path.exists(metadata_path):
        print(f"Metadata file does not exist: {metadata_path}")
        return
    
    # Load metadata
    print("\nLoading metadata...")
    question_analyzer = QuestionAnalyzer(resource_dir)
    
    # Print metadata entries
    print(f"Metadata entries: {len(question_analyzer.metadata)}")
    
    # Print first few entries
    count = 0
    for task_id, entry in question_analyzer.metadata.items():
        print(f"\nTask ID: {task_id}")
        print(f"Question: {entry.get('Question', 'N/A')[:100]}...")
        print(f"File Name: {entry.get('file_name', 'N/A')}")
        print(f"Expected Answer: {entry.get('Final answer', 'N/A')}")
        
        # Check if the file exists
        if entry.get('file_name'):
            file_path = os.path.join(resource_dir, entry['file_name'])
            if os.path.exists(file_path):
                print(f"βœ… File exists: {file_path}")
            else:
                print(f"❌ File does not exist: {file_path}")
        
        count += 1
        if count >= 5:
            break
    
    # Test file finding
    print("\nTesting file finding...")
    test_questions = []
    
    with open(metadata_path, 'r', encoding='utf-8') as f:
        for line in f:
            entry = json.loads(line.strip())
            if 'Question' in entry and 'file_name' in entry and entry['file_name']:
                test_questions.append({
                    'task_id': entry.get('task_id'),
                    'question': entry['Question'],
                    'file_name': entry['file_name']
                })
                if len(test_questions) >= 5:
                    break
    
    for q in test_questions:
        print(f"\nQuestion: {q['question'][:100]}...")
        print(f"Expected file: {q['file_name']}")
        
        file_path = question_analyzer.find_relevant_file(q['question'], q['task_id'])
        if file_path:
            print(f"βœ… Found file: {os.path.basename(file_path)}")
        else:
            print("❌ No file found")

if __name__ == "__main__":
    main()