File size: 8,504 Bytes
13755f8
21e96d1
 
c3c803a
21e96d1
 
bf58062
21e96d1
 
 
15b9880
21e96d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8bfc54d
21e96d1
 
 
15b9880
21e96d1
 
 
b6da6a3
 
 
 
 
 
 
 
 
 
 
 
21e96d1
b44b8a4
 
21e96d1
a3f556b
b44b8a4
 
 
21e96d1
5441081
 
 
 
af82781
5441081
 
b44b8a4
 
5441081
 
 
 
 
 
 
 
 
 
b44b8a4
21e96d1
 
b44b8a4
af82781
21e96d1
 
5441081
af82781
21e96d1
b44b8a4
21e96d1
 
 
5441081
21e96d1
b44b8a4
 
 
af82781
21e96d1
 
af82781
21e96d1
b44b8a4
 
21e96d1
5441081
 
 
 
af82781
 
5441081
af82781
 
 
 
 
 
21e96d1
af82781
 
 
 
 
 
 
 
 
 
b44b8a4
21e96d1
 
 
 
 
b44b8a4
af82781
 
 
21e96d1
b44b8a4
 
af82781
21e96d1
b44b8a4
 
af82781
21e96d1
af82781
 
 
5441081
21e96d1
af82781
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import os
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch # För att kontrollera enheter

# Importera ditt nya sökverktyg
from tools.tavily_search import search_tavily

class GaiaAgent:
    def __init__(self, model_id: str = "google/gemma-2b-it"):
        # Ladda tokenizer och modell manuellt. Detta ger mer kontroll.
        try:
            print(f"Laddar tokenizer för {model_id}...")
            self.tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.getenv("HF_TOKEN"))
            print(f"Laddar modell för {model_id}...")
            
            # Kontrollera om GPU är tillgänglig
            device = "cuda" if torch.cuda.is_available() else "cpu"
            print(f"Använder enhet: {device}")

            self.model = AutoModelForCausalLM.from_pretrained(
                model_id,
                torch_dtype=torch.bfloat16, # Använd bfloat16 för minskat minne
                device_map="auto", # Accelerate hanterar detta över CPU/GPU
                token=os.getenv("HF_TOKEN")
            )
            print("Modell laddad framgångsrikt.")

            # Skapa en pipeline för textgenerering
            self.text_generator = pipeline(
                "text-generation",
                model=self.model,
                tokenizer=self.tokenizer,
                # device=0 if device == "cuda" else -1 # 0 för första GPU, -1 för CPU
            )
            print("Textgenereringspipeline skapad.")

        except Exception as e:
            print(f"Fel vid initiering av agent: {e}")
            raise RuntimeError(f"Fel vid laddning av modell eller tokenizer: {e}")

    # --- THIS IS THE MISSING __CALL__ METHOD ---
    def __call__(self, question: str) -> str:
        """
        Denna metod gör att en instans av GaiaAgent kan kallas som en funktion.
        Den kommer att anropa din process_task metod för att generera svaret.
        """
        print(f"Agent received question (first 50 chars): {question[:50]}...")
        result = self.process_task(question)
        print(f"Agent returning answer: {result[:100]}...") # För att inte fylla loggarna med för långa svar
        return result
    # --- END OF MISSING METHOD ---
    
    def process_task(self, task_description: str) -> str:
        # Instruction to the LLM to perform the task and use tools.
        # We need to build a prompt that instructs the model to use tools.
        
        prompt = f"""
        You are a helpful and expert AI assistant with access to a search tool.
        Your task is to carefully and accurately answer questions by using the search tool when necessary.
        Always provide a complete and correct answer based on the information you find.

        You must follow a Thought, Tool, Observation, Answer (TTOA) pattern.
        
        **Thought:** First, carefully consider the task. What information do you need to answer the question? Do you need to use a tool?
        **Tool:** If you need to search, use the search_tavily tool. The format is: <TOOL_CODE>search_tavily("your search query")</TOOL_CODE>
        **Observation:** After a tool call, you will receive an observation (the tool's output). This is factual information.
        **Answer:** Once you have gathered all necessary information, provide your final, concise answer directly.

        Your available tools:
        1. search_tavily(query: str): Searches on Tavily and returns relevant results.

        Example Interaction:
        Task: What is the capital of France?
        Thought: I need to find the capital of France. I should use the search_tavily tool.
        Tool: <TOOL_CODE>search_tavily("capital of France")</TOOL_CODE>
        Observation: The capital of France is Paris.
        Answer: The capital of France is Paris.

        Now, let's start.

        Task: {task_description}
        """

        max_iterations = 3
        current_response_history = "" # Ny variabel för att bygga upp historiken

        for i in range(max_iterations):
            # Lägg till "Thought:" här för att uppmuntra modellen att starta sin tankeprocess
            full_prompt = prompt + current_response_history + "\n\nThought:"
            
            print(f"[{i+1}/{max_iterations}] Generating response with prompt length: {len(full_prompt)}")
            
            generated_text = self.text_generator(
                full_prompt, 
                max_new_tokens=1024, # Fortsätt med 1024 eller öka till 2048
                num_return_sequences=1,
                pad_token_id=self.tokenizer.eos_token_id,
                do_sample=True,
                top_k=50, top_p=0.95,
                temperature=0.7 
            )[0]['generated_text']

            # Extrahera endast den nya delen av texten (modellens respons efter den sista "Thought:")
            new_content = generated_text[len(full_prompt):].strip()
            print(f"DEBUG - Full generated_text: \n---START---\n{generated_text}\n---END---")
            print(f"DEBUG - Extracted new_content: '{new_content}'")

            # Kontrollera om modellen genererade ett svar som en 'Answer:'
            if "Answer:" in new_content:
                final_answer = new_content.split("Answer:", 1)[1].strip()
                print(f"Final answer from model:\n{final_answer}")
                return final_answer # Returnera det slutgiltiga svaret
            
            elif "<TOOL_CODE>" in new_content and "</TOOL_CODE>" in new_content:
                # Modellen genererade ett verktygskall.
                # Vi vill inte inkludera modellens egna "Observation:" eller "Tool:"-text i historiken
                # innan verktyget faktiskt körts. Vi tar bara själva tool_code strängen.
                
                tool_call_start = new_content.find("<TOOL_CODE>")
                tool_call_end = new_content.find("</TOOL_CODE>") + len("</TOOL_CODE>")
                
                # Försök att extrahera tanken som ledde till verktygskallet
                thought_part = ""
                if "Thought:" in new_content[:tool_call_start]:
                    thought_part = new_content.split("Thought:", 1)[1].split("Tool:", 1)[0].strip()
                elif tool_call_start > 0: # Om det finns text före tool code
                    thought_part = new_content[:tool_call_start].strip()

                tool_code_section = new_content[tool_call_start:tool_call_end]
                tool_call_str = tool_code_section.replace("<TOOL_CODE>", "").replace("</TOOL_CODE>", "").strip()

                print(f"Tool call detected: {tool_call_str}")
                
                try:
                    if tool_call_str.startswith("search_tavily("):
                        query = tool_call_str[len("search_tavily("):-1].strip().strip('"').strip("'")
                        tool_output = search_tavily(query)
                        print(f"Tool result: {tool_output[:200]}...")
                        
                        # Lägg till tanken, verktygskallet och det FAKTISKA observationen till historiken
                        current_response_history += f"\n\nThought: {thought_part}\nTool: {tool_code_section}\nObservation: {tool_output}\n"
                    else:
                        tool_output = f"Unknown tool: {tool_call_str}"
                        print(f"Error: {tool_output}")
                        current_response_history += f"\n\nThought: {thought_part}\nTool: {tool_code_section}\nObservation: {tool_output}\n"
                except Exception as tool_e:
                    tool_output = f"Error running tool {tool_call_str}: {tool_e}"
                    print(f"Error: {tool_output}")
                    current_response_history += f"\n\nThought: {thought_part}\nTool: {tool_code_section}\nObservation: {tool_output}\n"
            else:
                # Modellen genererade varken ett verktygskall eller ett slutgiltigt svar.
                # Lägg till det den faktiskt genererade till historiken så den kan fortsätta sin tanke.
                current_response_history += f"\n\nThought: {new_content}\n"
                print(f"Model generated non-tool/non-answer content. Appending: {new_content[:100]}...")

        # Om max_iterations nås utan slutgiltigt svar
        return "Agent could not complete the task within the allowed iterations. Latest relevant content: " + \
               (current_response_history[-500:] if current_response_history else "No meaningful content generated.")