HF_AgentCourse_FinalAssignment

Sleeping

HF_AgentCourse_FinalAssignment / test.py

Enhance final answer processing in FinalAnswerTool to extract concise results based on "FINAL ANSWER:" prefix, improving clarity and consistency in output formatting.

028b4c8 5 months ago

raw

history blame

8.04 kB

	import os
	import json
	import random
	import requests
	import yaml
	import pprint
	from dotenv import load_dotenv

	from smolagents import CodeAgent, HfApiModel
	from tools.final_answer import FinalAnswerTool
	from tools.visit_webpage import VisitWebpageTool
	from tools.web_search import DuckDuckGoSearchTool # Note: app.py imports this from tools.web_search and smolagents

	# Load environment variables from .env file
	load_dotenv()
	hf_token = os.getenv('HUGGINGFACE_TOKEN')
	if not hf_token:
	raise ValueError("HUGGINGFACE_TOKEN not found in environment variables. Make sure a .env file exists.")

	# --- Constants ---
	API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space") # Use env var or default
	QUESTIONS_URL = f"{API_URL}/questions"
	QUESTIONS_FILE = "questions.json"
	ANSWERS_LOG_FILE = "answer_log.jsonl"
	PROMPTS_FILE = "prompts.yaml"

	# --- Function to Fetch Questions ---
	def fetch_and_save_questions(url: str, filename: str):
	"""Fetches questions from the API and saves them to a local JSON file."""
	if os.path.exists(filename):
	print(f"Questions file '{filename}' already exists. Skipping download.")
	return True

	print(f"Fetching questions from: {url}")
	try:
	response = requests.get(url, timeout=30) # Increased timeout
	response.raise_for_status()
	questions_data = response.json()
	if not questions_data:
	print("Fetched questions list is empty.")
	return False

	with open(filename, 'w', encoding='utf-8') as f:
	json.dump(questions_data, f, indent=4, ensure_ascii=False)
	print(f"Successfully fetched {len(questions_data)} questions and saved to '{filename}'.")
	return True
	except requests.exceptions.RequestException as e:
	print(f"Error fetching questions: {e}")
	return False
	except requests.exceptions.JSONDecodeError as e:
	print(f"Error decoding JSON response from questions endpoint: {e}")
	if 'response' in locals():
	print(f"Response text: {response.text[:500]}")
	return False
	except Exception as e:
	print(f"An unexpected error occurred fetching questions: {e}")
	return False

	# --- Function to Load Questions ---
	def load_questions(filename: str) -> list:
	"""Loads questions from a local JSON file."""
	try:
	with open(filename, 'r', encoding='utf-8') as f:
	questions_data = json.load(f)
	print(f"Successfully loaded {len(questions_data)} questions from '{filename}'.")
	return questions_data
	except FileNotFoundError:
	print(f"Error: Questions file '{filename}' not found.")
	return []
	except json.JSONDecodeError:
	print(f"Error: Could not decode JSON from '{filename}'.")
	return []
	except Exception as e:
	print(f"An unexpected error occurred loading questions: {e}")
	return []

	# --- Function to Instantiate Agent ---
	def create_agent():
	"""Instantiates the CodeAgent with configuration similar to app.py."""
	try:
	# Load prompts
	with open(PROMPTS_FILE, 'r') as stream:
	prompt_templates = yaml.safe_load(stream)
	except FileNotFoundError:
	print(f"Error: Prompts file '{PROMPTS_FILE}' not found. Using default prompts.")
	prompt_templates = None # Or handle differently
	except yaml.YAMLError as e:
	print(f"Error parsing prompts file '{PROMPTS_FILE}': {e}. Using default prompts.")
	prompt_templates = None

	# Configure model
	model = HfApiModel(
	max_tokens=2096,
	temperature=0.5,
	model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
	# custom_role_conversions=None, # Optional, kept default
	token=hf_token,
	)

	# Create agent instance
	try:
	agent = CodeAgent(
	model=model,
	tools=[
	FinalAnswerTool(),
	DuckDuckGoSearchTool(),
	VisitWebpageTool(),
	],
	max_steps=6,
	verbosity_level=1, # Set higher (e.g., 2 or 3) to potentially see reasoning in stdout
	# grammar=None, # Optional, kept default
	# planning_interval=None, # Optional, kept default
	name="SmolAgentTester",
	description="An AI coding assistant for testing.",
	prompt_templates=prompt_templates,
	)
	print("CodeAgent instantiated successfully.")
	return agent
	except Exception as e:
	print(f"Error instantiating CodeAgent: {e}")
	return None

	# --- Main Execution Logic ---
	if __name__ == "__main__":
	print("Starting test script...")

	# Step 1: Fetch and save questions
	if not fetch_and_save_questions(QUESTIONS_URL, QUESTIONS_FILE):
	print("Failed to fetch questions. Exiting.")
	exit(1)

	# Step 2: Load questions
	all_questions = load_questions(QUESTIONS_FILE)
	if not all_questions:
	print("Failed to load questions. Exiting.")
	exit(1)

	# Step 3: Randomly pick 2 questions
	if len(all_questions) < 2:
	print("Warning: Fewer than 2 questions available. Testing with all available questions.")
	selected_questions = all_questions
	else:
	selected_questions = random.sample(all_questions, 2)

	print(f"\nSelected {len(selected_questions)} questions for testing:")
	pprint.pprint(selected_questions)
	print("-"*50)

	# Step 4: Instantiate agent
	agent = create_agent()
	if agent is None:
	print("Failed to create agent. Exiting.")
	exit(1)

	# Step 5: Run agent and log results
	print(f"Running agent on {len(selected_questions)} questions...")
	results_log = []

	# Clear or create the log file
	with open(ANSWERS_LOG_FILE, 'w', encoding='utf-8') as log_f:
	pass # Just to clear the file initially

	for item in selected_questions:
	task_id = item.get("task_id")
	question_text = item.get("question")
	if not task_id or question_text is None:
	print(f"Skipping item with missing task_id or question: {item}")
	continue

	print(f"\n--- Running Task ID: {task_id} ---")
	print(f"Question: {question_text}")

	try:
	# Run the agent
	# Note: The agent call might print its own reasoning steps depending on verbosity
	model_answer = agent(question_text) # This now holds the CONCISE answer from FinalAnswerTool
	print(f"\nAgent Final Answer: {model_answer}") # Renamed print for clarity

	# Prepare result for logging
	result = {
	"task_id": task_id,
	"question": question_text,
	"model_answer": model_answer, # Directly use the concise answer
	# "reasoning_trace": "TODO" # Add if agent provides trace separately
	}
	results_log.append(result)

	# Append result to log file (JSON Lines format)
	with open(ANSWERS_LOG_FILE, 'a', encoding='utf-8') as log_f:
	json.dump(result, log_f, ensure_ascii=False)
	log_f.write('\n')

	except Exception as e:
	print(f"\nAGENT ERROR on task {task_id}: {e}")
	# Optionally log errors too
	error_result = {"task_id": task_id, "model_answer": f"AGENT_ERROR: {e}"}
	results_log.append(error_result)
	with open(ANSWERS_LOG_FILE, 'a', encoding='utf-8') as log_f:
	json.dump(error_result, log_f, ensure_ascii=False)
	log_f.write('\n')

	print("-"*50)
	print(f"\nTest script finished. {len(results_log)} results logged to '{ANSWERS_LOG_FILE}'.")
	print("Summary of results:")
	pprint.pprint(results_log)

	# Ensure prompts.yaml and .env exist in the same directory or adjust paths.
	# Ensure necessary packages are installed: pip install requests pyyaml python-dotenv python-pprint smol-agents

	# ... rest of the script to be added ...