Spaces:
Paused
Paused
Commit
·
0014151
1
Parent(s):
39a4147
updated
Browse files- app.py +10 -2
- chatbot/chatbot.py +0 -77
app.py
CHANGED
@@ -48,6 +48,10 @@ import json
|
|
48 |
# anywhere without needing to change configuration. The ``chroma_db``
|
49 |
# directory will be created automatically by the Chroma client if it does not
|
50 |
# exist.
|
|
|
|
|
|
|
|
|
51 |
CHATBOT_TXT_PATH = os.path.join(current_dir, 'chatbot', 'chatbot.txt')
|
52 |
CHATBOT_DB_DIR = "/tmp/chroma_db"
|
53 |
# API credentials for Groq. These values mirror those in the standalone
|
@@ -170,8 +174,11 @@ def get_chatbot_response(query: str) -> str:
|
|
170 |
# the application that might use OpenAI for different providers.
|
171 |
openai.api_key = GROQ_API_KEY
|
172 |
openai.api_base = "https://api.groq.com/openai/v1"
|
|
|
|
|
|
|
173 |
|
174 |
-
completion =
|
175 |
model=GROQ_MODEL,
|
176 |
messages=[
|
177 |
{"role": "system", "content": system_prompt},
|
@@ -181,7 +188,8 @@ def get_chatbot_response(query: str) -> str:
|
|
181 |
temperature=0.3,
|
182 |
)
|
183 |
|
184 |
-
return completion
|
|
|
185 |
# Initialize Flask app
|
186 |
app = Flask(
|
187 |
__name__,
|
|
|
48 |
# anywhere without needing to change configuration. The ``chroma_db``
|
49 |
# directory will be created automatically by the Chroma client if it does not
|
50 |
# exist.
|
51 |
+
import shutil
|
52 |
+
|
53 |
+
# Remove any old unwritable Chroma DB path from previous versions
|
54 |
+
shutil.rmtree("/app/chatbot/chroma_db", ignore_errors=True)
|
55 |
CHATBOT_TXT_PATH = os.path.join(current_dir, 'chatbot', 'chatbot.txt')
|
56 |
CHATBOT_DB_DIR = "/tmp/chroma_db"
|
57 |
# API credentials for Groq. These values mirror those in the standalone
|
|
|
174 |
# the application that might use OpenAI for different providers.
|
175 |
openai.api_key = GROQ_API_KEY
|
176 |
openai.api_base = "https://api.groq.com/openai/v1"
|
177 |
+
from openai import OpenAI
|
178 |
+
|
179 |
+
client = OpenAI(api_key=GROQ_API_KEY, base_url="https://api.groq.com/openai/v1")
|
180 |
|
181 |
+
completion = client.chat.completions.create(
|
182 |
model=GROQ_MODEL,
|
183 |
messages=[
|
184 |
{"role": "system", "content": system_prompt},
|
|
|
188 |
temperature=0.3,
|
189 |
)
|
190 |
|
191 |
+
return completion.choices[0].message["content"].strip()
|
192 |
+
|
193 |
# Initialize Flask app
|
194 |
app = Flask(
|
195 |
__name__,
|
chatbot/chatbot.py
DELETED
@@ -1,77 +0,0 @@
|
|
1 |
-
# chatbot/chatbot.py
|
2 |
-
|
3 |
-
from flask import Flask, request, jsonify
|
4 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
5 |
-
from sentence_transformers import SentenceTransformer
|
6 |
-
import chromadb
|
7 |
-
from chromadb.config import Settings
|
8 |
-
import openai
|
9 |
-
import os
|
10 |
-
|
11 |
-
# === CONFIG ===
|
12 |
-
GROQ_API_KEY = "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"
|
13 |
-
GROQ_MODEL = "llama3-8b-8192"
|
14 |
-
CHATBOT_TXT_PATH = "./chatbot/chatbot.txt"
|
15 |
-
|
16 |
-
# === Setup ===
|
17 |
-
app = Flask(__name__)
|
18 |
-
openai.api_key = GROQ_API_KEY
|
19 |
-
openai.api_base = "https://api.groq.com/openai/v1"
|
20 |
-
|
21 |
-
# === Load and split chatbot.txt ===
|
22 |
-
text = open(CHATBOT_TXT_PATH, encoding="utf-8").read()
|
23 |
-
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
|
24 |
-
docs = [doc.strip() for doc in splitter.split_text(text)]
|
25 |
-
|
26 |
-
# === Embed and store in ChromaDB ===
|
27 |
-
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
28 |
-
embeddings = embedder.encode(docs, show_progress_bar=True, batch_size=32)
|
29 |
-
|
30 |
-
client = chromadb.Client(Settings(persist_directory="./chatbot/chroma_db", anonymized_telemetry=False))
|
31 |
-
collection = client.get_or_create_collection("chatbot")
|
32 |
-
ids = [f"doc_{i}" for i in range(len(docs))]
|
33 |
-
collection.add(documents=docs, embeddings=embeddings, ids=ids)
|
34 |
-
|
35 |
-
# === Core logic ===
|
36 |
-
def get_response(query: str) -> str:
|
37 |
-
query_embedding = embedder.encode([query])[0]
|
38 |
-
results = collection.query(query_embeddings=[query_embedding], n_results=3)
|
39 |
-
retrieved_docs = results['documents'][0]
|
40 |
-
context = "\n".join(retrieved_docs)
|
41 |
-
|
42 |
-
system_prompt = (
|
43 |
-
"You are a helpful assistant for the Codingo website. "
|
44 |
-
"Only answer questions that are directly relevant to the context provided. "
|
45 |
-
"If the user asks anything unrelated, politely refuse by saying: "
|
46 |
-
"\"I'm only trained to answer questions about the Codingo platform.\""
|
47 |
-
)
|
48 |
-
|
49 |
-
user_prompt = f"Context:\n{context}\n\nQuestion: {query}"
|
50 |
-
|
51 |
-
completion = openai.ChatCompletion.create(
|
52 |
-
model=GROQ_MODEL,
|
53 |
-
messages=[
|
54 |
-
{"role": "system", "content": system_prompt},
|
55 |
-
{"role": "user", "content": user_prompt},
|
56 |
-
],
|
57 |
-
max_tokens=200,
|
58 |
-
temperature=0.3,
|
59 |
-
)
|
60 |
-
|
61 |
-
return completion['choices'][0]['message']['content'].strip()
|
62 |
-
|
63 |
-
# === Flask route ===
|
64 |
-
@app.route("/chat", methods=["POST"])
|
65 |
-
def chat():
|
66 |
-
user_input = request.json.get("message", "").strip()
|
67 |
-
if not user_input:
|
68 |
-
return jsonify({"error": "Empty message"}), 400
|
69 |
-
|
70 |
-
try:
|
71 |
-
reply = get_response(user_input)
|
72 |
-
return jsonify({"response": reply})
|
73 |
-
except Exception as e:
|
74 |
-
return jsonify({"error": str(e)}), 500
|
75 |
-
|
76 |
-
if __name__ == "__main__":
|
77 |
-
app.run(port=5001)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|