""" Test script to debug metadata loading and file finding. """ import os import json import sys # Add the parent directory to sys.path sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from agent.utils.question_analyzer import QuestionAnalyzer def main(): """Main function to test metadata loading and file finding.""" # Get the resource directory resource_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'resource') print(f"Resource directory: {resource_dir}") # Check if the directory exists if not os.path.exists(resource_dir): print(f"Resource directory does not exist: {resource_dir}") return # List files in the directory print("Files in resource directory:") for file in os.listdir(resource_dir): print(f" {file}") # Check for metadata.jsonl metadata_path = os.path.join(resource_dir, 'metadata.jsonl') if not os.path.exists(metadata_path): print(f"Metadata file does not exist: {metadata_path}") return # Load metadata print("\nLoading metadata...") question_analyzer = QuestionAnalyzer(resource_dir) # Print metadata entries print(f"Metadata entries: {len(question_analyzer.metadata)}") # Print first few entries count = 0 for task_id, entry in question_analyzer.metadata.items(): print(f"\nTask ID: {task_id}") print(f"Question: {entry.get('Question', 'N/A')[:100]}...") print(f"File Name: {entry.get('file_name', 'N/A')}") print(f"Expected Answer: {entry.get('Final answer', 'N/A')}") # Check if the file exists if entry.get('file_name'): file_path = os.path.join(resource_dir, entry['file_name']) if os.path.exists(file_path): print(f"✅ File exists: {file_path}") else: print(f"❌ File does not exist: {file_path}") count += 1 if count >= 5: break # Test file finding print("\nTesting file finding...") test_questions = [] with open(metadata_path, 'r', encoding='utf-8') as f: for line in f: entry = json.loads(line.strip()) if 'Question' in entry and 'file_name' in entry and entry['file_name']: test_questions.append({ 'task_id': entry.get('task_id'), 'question': entry['Question'], 'file_name': entry['file_name'] }) if len(test_questions) >= 5: break for q in test_questions: print(f"\nQuestion: {q['question'][:100]}...") print(f"Expected file: {q['file_name']}") file_path = question_analyzer.find_relevant_file(q['question'], q['task_id']) if file_path: print(f"✅ Found file: {os.path.basename(file_path)}") else: print("❌ No file found") if __name__ == "__main__": main()