Spaces:

LamiaYT
/

gaia-llamaindex-agent

Sleeping

App Files Files Community

LamiaYT commited on Jun 25

Commit

c2e1cfe

1 Parent(s): afe879b

Fix Dockerfile & Gradio compatibility

Browse files

Files changed (2) hide show

agent/local_llm.py +6 -6
app.py +21 -5

agent/local_llm.py CHANGED Viewed

@@ -8,8 +8,8 @@ except ImportError as e:
 class LocalLLM:
     def __init__(self):
-        # Use a chat-compatible model
-        self.model_name = "tiiuae/falcon-rw-1b"
         print(f"Initializing LocalLLM with model: {self.model_name}")
         self.llm = self._create_llama_index_llm()
@@ -30,7 +30,7 @@ class LocalLLM:
             llm = HuggingFaceLLM(
                 model=model,
                 tokenizer=tokenizer,
-                context_window=4096,
                 generate_kwargs={"temperature": 0.7, "max_new_tokens": 256},
                 tokenizer_kwargs={"use_fast": True},
                 device_map="auto" if torch.cuda.is_available() else None
@@ -67,15 +67,15 @@ class LocalLLM:
                 class MockResponse:
                     def __init__(self, text):
                         self.message = type('obj', (object,), {'content': text})
-                return MockResponse("Mock chat response.")
             def complete(self, prompt, **kwargs):
                 class MockCompletion:
                     def __init__(self, text):
                         self.text = text
-                return MockCompletion("Mock completion response.")
         return MockLLM()
     def get_llm(self):
-        return self.llm

 class LocalLLM:
     def __init__(self):
+        # Use a lightweight chat-compatible model that actually exists
+        self.model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
         print(f"Initializing LocalLLM with model: {self.model_name}")
         self.llm = self._create_llama_index_llm()
             llm = HuggingFaceLLM(
                 model=model,
                 tokenizer=tokenizer,
+                context_window=2048,
                 generate_kwargs={"temperature": 0.7, "max_new_tokens": 256},
                 tokenizer_kwargs={"use_fast": True},
                 device_map="auto" if torch.cuda.is_available() else None
                 class MockResponse:
                     def __init__(self, text):
                         self.message = type('obj', (object,), {'content': text})
+                return MockResponse("Mock chat response: I would analyze this question and provide an answer.")
             def complete(self, prompt, **kwargs):
                 class MockCompletion:
                     def __init__(self, text):
                         self.text = text
+                return MockCompletion("Mock completion response: I would analyze this question and provide an answer.")
         return MockLLM()
     def get_llm(self):
+        return self.llm

app.py CHANGED Viewed

@@ -157,16 +157,32 @@ Question: {question_text}
         print(f"🤔 Processing question: {question_text[:50]}...")
-        # Handle both ReAct agent and mock LLM
         if hasattr(agent, 'query'):
             response = agent.query(enhanced_prompt)
         elif hasattr(agent, 'chat'):
-            response = agent.chat([{"role": "user", "content": enhanced_prompt}])
-            response = response.message.content if hasattr(response, 'message') else str(response)
         else:
-            response = "Mock response: I would analyze this question and provide an answer."
-        answer = str(response).strip()
         # Remove common prefixes from the answer
         for prefix in ["FINAL ANSWER:", "Answer:", "The answer is:", "Final answer:"]:

         print(f"🤔 Processing question: {question_text[:50]}...")
+        # FIXED: Use .complete() instead of .chat() to avoid chat template errors
         if hasattr(agent, 'query'):
             response = agent.query(enhanced_prompt)
+        elif hasattr(agent, 'complete'):
+            # Use complete() method for models without chat templates
+            response = agent.complete(enhanced_prompt)
+            answer = response.text if hasattr(response, 'text') else str(response)
         elif hasattr(agent, 'chat'):
+            # Only use chat if it's the MockLLM or a proper chat model
+            try:
+                response = agent.chat([{"role": "user", "content": enhanced_prompt}])
+                answer = response.message.content if hasattr(response, 'message') else str(response)
+            except Exception as chat_error:
+                # Fallback to complete if chat fails
+                print(f"⚠️ Chat method failed, trying complete: {chat_error}")
+                if hasattr(agent, 'complete'):
+                    response = agent.complete(enhanced_prompt)
+                    answer = response.text if hasattr(response, 'text') else str(response)
+                else:
+                    raise chat_error
         else:
+            answer = "Mock response: I would analyze this question and provide an answer."
+        # Clean up the answer if it wasn't already processed above
+        if 'answer' not in locals():
+            answer = str(response).strip()
         # Remove common prefixes from the answer
         for prefix in ["FINAL ANSWER:", "Answer:", "The answer is:", "Final answer:"]: