Spaces:
Sleeping
Sleeping
File size: 3,048 Bytes
f011b22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
"""
Test script to debug metadata loading and file finding.
"""
import os
import json
import sys
# Add the parent directory to sys.path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from agent.utils.question_analyzer import QuestionAnalyzer
def main():
"""Main function to test metadata loading and file finding."""
# Get the resource directory
resource_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'resource')
print(f"Resource directory: {resource_dir}")
# Check if the directory exists
if not os.path.exists(resource_dir):
print(f"Resource directory does not exist: {resource_dir}")
return
# List files in the directory
print("Files in resource directory:")
for file in os.listdir(resource_dir):
print(f" {file}")
# Check for metadata.jsonl
metadata_path = os.path.join(resource_dir, 'metadata.jsonl')
if not os.path.exists(metadata_path):
print(f"Metadata file does not exist: {metadata_path}")
return
# Load metadata
print("\nLoading metadata...")
question_analyzer = QuestionAnalyzer(resource_dir)
# Print metadata entries
print(f"Metadata entries: {len(question_analyzer.metadata)}")
# Print first few entries
count = 0
for task_id, entry in question_analyzer.metadata.items():
print(f"\nTask ID: {task_id}")
print(f"Question: {entry.get('Question', 'N/A')[:100]}...")
print(f"File Name: {entry.get('file_name', 'N/A')}")
print(f"Expected Answer: {entry.get('Final answer', 'N/A')}")
# Check if the file exists
if entry.get('file_name'):
file_path = os.path.join(resource_dir, entry['file_name'])
if os.path.exists(file_path):
print(f"β
File exists: {file_path}")
else:
print(f"β File does not exist: {file_path}")
count += 1
if count >= 5:
break
# Test file finding
print("\nTesting file finding...")
test_questions = []
with open(metadata_path, 'r', encoding='utf-8') as f:
for line in f:
entry = json.loads(line.strip())
if 'Question' in entry and 'file_name' in entry and entry['file_name']:
test_questions.append({
'task_id': entry.get('task_id'),
'question': entry['Question'],
'file_name': entry['file_name']
})
if len(test_questions) >= 5:
break
for q in test_questions:
print(f"\nQuestion: {q['question'][:100]}...")
print(f"Expected file: {q['file_name']}")
file_path = question_analyzer.find_relevant_file(q['question'], q['task_id'])
if file_path:
print(f"β
Found file: {os.path.basename(file_path)}")
else:
print("β No file found")
if __name__ == "__main__":
main()
|