Final_Assignment_Template

Sleeping

App Files Files Community

Final_Assignment_Template / agent.py

Freddolin

Update agent.py

af82781 verified about 2 months ago

raw

history blame

8.5 kB

	import os
	from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
	import torch # För att kontrollera enheter

	# Importera ditt nya sökverktyg
	from tools.tavily_search import search_tavily

	class GaiaAgent:
	def __init__(self, model_id: str = "google/gemma-2b-it"):
	# Ladda tokenizer och modell manuellt. Detta ger mer kontroll.
	try:
	print(f"Laddar tokenizer för {model_id}...")
	self.tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.getenv("HF_TOKEN"))
	print(f"Laddar modell för {model_id}...")

	# Kontrollera om GPU är tillgänglig
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Använder enhet: {device}")

	self.model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16, # Använd bfloat16 för minskat minne
	device_map="auto", # Accelerate hanterar detta över CPU/GPU
	token=os.getenv("HF_TOKEN")
	)
	print("Modell laddad framgångsrikt.")

	# Skapa en pipeline för textgenerering
	self.text_generator = pipeline(
	"text-generation",
	model=self.model,
	tokenizer=self.tokenizer,
	# device=0 if device == "cuda" else -1 # 0 för första GPU, -1 för CPU
	)
	print("Textgenereringspipeline skapad.")

	except Exception as e:
	print(f"Fel vid initiering av agent: {e}")
	raise RuntimeError(f"Fel vid laddning av modell eller tokenizer: {e}")

	# --- THIS IS THE MISSING __CALL__ METHOD ---
	def __call__(self, question: str) -> str:
	"""
	Denna metod gör att en instans av GaiaAgent kan kallas som en funktion.
	Den kommer att anropa din process_task metod för att generera svaret.
	"""
	print(f"Agent received question (first 50 chars): {question[:50]}...")
	result = self.process_task(question)
	print(f"Agent returning answer: {result[:100]}...") # För att inte fylla loggarna med för långa svar
	return result
	# --- END OF MISSING METHOD ---

	def process_task(self, task_description: str) -> str:
	# Instruction to the LLM to perform the task and use tools.
	# We need to build a prompt that instructs the model to use tools.

	prompt = f"""
	You are a helpful and expert AI assistant with access to a search tool.
	Your task is to carefully and accurately answer questions by using the search tool when necessary.
	Always provide a complete and correct answer based on the information you find.

	You must follow a Thought, Tool, Observation, Answer (TTOA) pattern.

	Thought: First, carefully consider the task. What information do you need to answer the question? Do you need to use a tool?
	Tool: If you need to search, use the search_tavily tool. The format is: <TOOL_CODE>search_tavily("your search query")</TOOL_CODE>
	Observation: After a tool call, you will receive an observation (the tool's output). This is factual information.
	Answer: Once you have gathered all necessary information, provide your final, concise answer directly.

	Your available tools:
	1. search_tavily(query: str): Searches on Tavily and returns relevant results.

	Example Interaction:
	Task: What is the capital of France?
	Thought: I need to find the capital of France. I should use the search_tavily tool.
	Tool: <TOOL_CODE>search_tavily("capital of France")</TOOL_CODE>
	Observation: The capital of France is Paris.
	Answer: The capital of France is Paris.

	Now, let's start.

	Task: {task_description}
	"""

	max_iterations = 3
	current_response_history = "" # Ny variabel för att bygga upp historiken

	for i in range(max_iterations):
	# Lägg till "Thought:" här för att uppmuntra modellen att starta sin tankeprocess
	full_prompt = prompt + current_response_history + "\n\nThought:"

	print(f"[{i+1}/{max_iterations}] Generating response with prompt length: {len(full_prompt)}")

	generated_text = self.text_generator(
	full_prompt,
	max_new_tokens=1024, # Fortsätt med 1024 eller öka till 2048
	num_return_sequences=1,
	pad_token_id=self.tokenizer.eos_token_id,
	do_sample=True,
	top_k=50, top_p=0.95,
	temperature=0.7
	)[0]['generated_text']

	# Extrahera endast den nya delen av texten (modellens respons efter den sista "Thought:")
	new_content = generated_text[len(full_prompt):].strip()
	print(f"DEBUG - Full generated_text: \n---START---\n{generated_text}\n---END---")
	print(f"DEBUG - Extracted new_content: '{new_content}'")

	# Kontrollera om modellen genererade ett svar som en 'Answer:'
	if "Answer:" in new_content:
	final_answer = new_content.split("Answer:", 1)[1].strip()
	print(f"Final answer from model:\n{final_answer}")
	return final_answer # Returnera det slutgiltiga svaret

	elif "<TOOL_CODE>" in new_content and "</TOOL_CODE>" in new_content:
	# Modellen genererade ett verktygskall.
	# Vi vill inte inkludera modellens egna "Observation:" eller "Tool:"-text i historiken
	# innan verktyget faktiskt körts. Vi tar bara själva tool_code strängen.

	tool_call_start = new_content.find("<TOOL_CODE>")
	tool_call_end = new_content.find("</TOOL_CODE>") + len("</TOOL_CODE>")

	# Försök att extrahera tanken som ledde till verktygskallet
	thought_part = ""
	if "Thought:" in new_content[:tool_call_start]:
	thought_part = new_content.split("Thought:", 1)[1].split("Tool:", 1)[0].strip()
	elif tool_call_start > 0: # Om det finns text före tool code
	thought_part = new_content[:tool_call_start].strip()

	tool_code_section = new_content[tool_call_start:tool_call_end]
	tool_call_str = tool_code_section.replace("<TOOL_CODE>", "").replace("</TOOL_CODE>", "").strip()

	print(f"Tool call detected: {tool_call_str}")

	try:
	if tool_call_str.startswith("search_tavily("):
	query = tool_call_str[len("search_tavily("):-1].strip().strip('"').strip("'")
	tool_output = search_tavily(query)
	print(f"Tool result: {tool_output[:200]}...")

	# Lägg till tanken, verktygskallet och det FAKTISKA observationen till historiken
	current_response_history += f"\n\nThought: {thought_part}\nTool: {tool_code_section}\nObservation: {tool_output}\n"
	else:
	tool_output = f"Unknown tool: {tool_call_str}"
	print(f"Error: {tool_output}")
	current_response_history += f"\n\nThought: {thought_part}\nTool: {tool_code_section}\nObservation: {tool_output}\n"
	except Exception as tool_e:
	tool_output = f"Error running tool {tool_call_str}: {tool_e}"
	print(f"Error: {tool_output}")
	current_response_history += f"\n\nThought: {thought_part}\nTool: {tool_code_section}\nObservation: {tool_output}\n"
	else:
	# Modellen genererade varken ett verktygskall eller ett slutgiltigt svar.
	# Lägg till det den faktiskt genererade till historiken så den kan fortsätta sin tanke.
	current_response_history += f"\n\nThought: {new_content}\n"
	print(f"Model generated non-tool/non-answer content. Appending: {new_content[:100]}...")

	# Om max_iterations nås utan slutgiltigt svar
	return "Agent could not complete the task within the allowed iterations. Latest relevant content: " + \
	(current_response_history[-500:] if current_response_history else "No meaningful content generated.")