Update app.py
Browse files
app.py
CHANGED
@@ -11,8 +11,8 @@ import uuid # For generating unique session IDs
|
|
11 |
app = FastAPI()
|
12 |
|
13 |
# === Model Config ===
|
14 |
-
#
|
15 |
-
REPO_ID = "
|
16 |
FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" # Q4_K_M is a good balance of size and quality
|
17 |
MODEL_DIR = "models"
|
18 |
MODEL_PATH = os.path.join(MODEL_DIR, FILENAME)
|
@@ -185,4 +185,4 @@ async def generate(request: Request):
|
|
185 |
# Remove the last user message from history if generation failed to prevent bad state
|
186 |
if chat_histories[session_id] and chat_histories[session_id][-1]["role"] == "user":
|
187 |
chat_histories[session_id].pop()
|
188 |
-
return {"error": f"Failed to generate response: {e}. Please try again.", "session_id": session_id}, 500
|
|
|
11 |
app = FastAPI()
|
12 |
|
13 |
# === Model Config ===
|
14 |
+
# Corrected REPO_ID to use TheBloke's GGUF version of TinyLlama
|
15 |
+
REPO_ID = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
16 |
FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" # Q4_K_M is a good balance of size and quality
|
17 |
MODEL_DIR = "models"
|
18 |
MODEL_PATH = os.path.join(MODEL_DIR, FILENAME)
|
|
|
185 |
# Remove the last user message from history if generation failed to prevent bad state
|
186 |
if chat_histories[session_id] and chat_histories[session_id][-1]["role"] == "user":
|
187 |
chat_histories[session_id].pop()
|
188 |
+
return {"error": f"Failed to generate response: {e}. Please try again.", "session_id": session_id}, 500
|