Spaces:
Sleeping
Sleeping
Fix Dockerfile & Gradio compatibility
Browse files- agent/local_llm.py +6 -6
- app.py +21 -5
agent/local_llm.py
CHANGED
@@ -8,8 +8,8 @@ except ImportError as e:
|
|
8 |
|
9 |
class LocalLLM:
|
10 |
def __init__(self):
|
11 |
-
# Use a chat-compatible model
|
12 |
-
self.model_name = "
|
13 |
print(f"Initializing LocalLLM with model: {self.model_name}")
|
14 |
self.llm = self._create_llama_index_llm()
|
15 |
|
@@ -30,7 +30,7 @@ class LocalLLM:
|
|
30 |
llm = HuggingFaceLLM(
|
31 |
model=model,
|
32 |
tokenizer=tokenizer,
|
33 |
-
context_window=
|
34 |
generate_kwargs={"temperature": 0.7, "max_new_tokens": 256},
|
35 |
tokenizer_kwargs={"use_fast": True},
|
36 |
device_map="auto" if torch.cuda.is_available() else None
|
@@ -67,15 +67,15 @@ class LocalLLM:
|
|
67 |
class MockResponse:
|
68 |
def __init__(self, text):
|
69 |
self.message = type('obj', (object,), {'content': text})
|
70 |
-
return MockResponse("Mock chat response.")
|
71 |
|
72 |
def complete(self, prompt, **kwargs):
|
73 |
class MockCompletion:
|
74 |
def __init__(self, text):
|
75 |
self.text = text
|
76 |
-
return MockCompletion("Mock completion response.")
|
77 |
|
78 |
return MockLLM()
|
79 |
|
80 |
def get_llm(self):
|
81 |
-
return self.llm
|
|
|
8 |
|
9 |
class LocalLLM:
|
10 |
def __init__(self):
|
11 |
+
# Use a lightweight chat-compatible model that actually exists
|
12 |
+
self.model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
|
13 |
print(f"Initializing LocalLLM with model: {self.model_name}")
|
14 |
self.llm = self._create_llama_index_llm()
|
15 |
|
|
|
30 |
llm = HuggingFaceLLM(
|
31 |
model=model,
|
32 |
tokenizer=tokenizer,
|
33 |
+
context_window=2048,
|
34 |
generate_kwargs={"temperature": 0.7, "max_new_tokens": 256},
|
35 |
tokenizer_kwargs={"use_fast": True},
|
36 |
device_map="auto" if torch.cuda.is_available() else None
|
|
|
67 |
class MockResponse:
|
68 |
def __init__(self, text):
|
69 |
self.message = type('obj', (object,), {'content': text})
|
70 |
+
return MockResponse("Mock chat response: I would analyze this question and provide an answer.")
|
71 |
|
72 |
def complete(self, prompt, **kwargs):
|
73 |
class MockCompletion:
|
74 |
def __init__(self, text):
|
75 |
self.text = text
|
76 |
+
return MockCompletion("Mock completion response: I would analyze this question and provide an answer.")
|
77 |
|
78 |
return MockLLM()
|
79 |
|
80 |
def get_llm(self):
|
81 |
+
return self.llm
|
app.py
CHANGED
@@ -157,16 +157,32 @@ Question: {question_text}
|
|
157 |
|
158 |
print(f"🤔 Processing question: {question_text[:50]}...")
|
159 |
|
160 |
-
#
|
161 |
if hasattr(agent, 'query'):
|
162 |
response = agent.query(enhanced_prompt)
|
|
|
|
|
|
|
|
|
163 |
elif hasattr(agent, 'chat'):
|
164 |
-
|
165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
else:
|
167 |
-
|
168 |
|
169 |
-
answer
|
|
|
|
|
170 |
|
171 |
# Remove common prefixes from the answer
|
172 |
for prefix in ["FINAL ANSWER:", "Answer:", "The answer is:", "Final answer:"]:
|
|
|
157 |
|
158 |
print(f"🤔 Processing question: {question_text[:50]}...")
|
159 |
|
160 |
+
# FIXED: Use .complete() instead of .chat() to avoid chat template errors
|
161 |
if hasattr(agent, 'query'):
|
162 |
response = agent.query(enhanced_prompt)
|
163 |
+
elif hasattr(agent, 'complete'):
|
164 |
+
# Use complete() method for models without chat templates
|
165 |
+
response = agent.complete(enhanced_prompt)
|
166 |
+
answer = response.text if hasattr(response, 'text') else str(response)
|
167 |
elif hasattr(agent, 'chat'):
|
168 |
+
# Only use chat if it's the MockLLM or a proper chat model
|
169 |
+
try:
|
170 |
+
response = agent.chat([{"role": "user", "content": enhanced_prompt}])
|
171 |
+
answer = response.message.content if hasattr(response, 'message') else str(response)
|
172 |
+
except Exception as chat_error:
|
173 |
+
# Fallback to complete if chat fails
|
174 |
+
print(f"⚠️ Chat method failed, trying complete: {chat_error}")
|
175 |
+
if hasattr(agent, 'complete'):
|
176 |
+
response = agent.complete(enhanced_prompt)
|
177 |
+
answer = response.text if hasattr(response, 'text') else str(response)
|
178 |
+
else:
|
179 |
+
raise chat_error
|
180 |
else:
|
181 |
+
answer = "Mock response: I would analyze this question and provide an answer."
|
182 |
|
183 |
+
# Clean up the answer if it wasn't already processed above
|
184 |
+
if 'answer' not in locals():
|
185 |
+
answer = str(response).strip()
|
186 |
|
187 |
# Remove common prefixes from the answer
|
188 |
for prefix in ["FINAL ANSWER:", "Answer:", "The answer is:", "Final answer:"]:
|