Spaces:
Paused
Paused
Commit
·
5420626
1
Parent(s):
3e1e43f
update
Browse files
app.py
CHANGED
@@ -16,7 +16,6 @@ from flask import Flask, render_template, redirect, url_for, flash, request, jso
|
|
16 |
from flask_login import LoginManager, login_required, current_user
|
17 |
from werkzeug.utils import secure_filename
|
18 |
import sys
|
19 |
-
import json
|
20 |
from datetime import datetime
|
21 |
|
22 |
# Adjust sys.path for import flexibility
|
@@ -75,65 +74,57 @@ _hf_model = None
|
|
75 |
_hf_tokenizer = None
|
76 |
|
77 |
def init_hf_model() -> None:
|
78 |
-
"""Initialise the Hugging Face conversational model and tokenizer.
|
79 |
-
|
80 |
-
Loading large Transformer models can be expensive. This helper ensures
|
81 |
-
that we only perform the download and model initialisation once. On
|
82 |
-
subsequent calls the function returns immediately if the model and
|
83 |
-
tokenizer are already loaded. The model is moved to GPU if one is
|
84 |
-
available; otherwise it will run on the CPU. Any import of heavy
|
85 |
-
dependencies such as ``transformers`` or ``torch`` is performed inside
|
86 |
-
this function to keep the global import section lightweight.
|
87 |
-
"""
|
88 |
global _hf_model, _hf_tokenizer
|
89 |
if _hf_model is not None and _hf_tokenizer is not None:
|
90 |
return
|
91 |
-
|
92 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
93 |
import torch
|
94 |
|
95 |
-
|
96 |
-
# fallback to CPU. The application will run correctly on CPU-only
|
97 |
-
# systems albeit with higher latency.
|
98 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
99 |
|
100 |
-
|
101 |
-
|
102 |
-
# ``HF_HOME`` / ``TRANSFORMERS_CACHE`` which are set at the top of
|
103 |
-
# this file to a writable temporary directory.
|
104 |
-
tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_NAME)
|
105 |
-
model = AutoModelForSeq2SeqLM.from_pretrained(HF_MODEL_NAME)
|
106 |
-
model.to(device)
|
107 |
|
108 |
_hf_model = model
|
109 |
_hf_tokenizer = tokenizer
|
110 |
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
# the OpenAI client is performed in ``get_chatbot_response()`` to avoid
|
116 |
-
# unintentional import side effects at module import time.
|
117 |
-
_chatbot_embedder = None
|
118 |
-
_chatbot_collection = None
|
119 |
-
|
120 |
-
def init_hf_model() -> None:
|
121 |
-
"""Initialise the Hugging Face conversational model and tokenizer."""
|
122 |
-
global _hf_model, _hf_tokenizer
|
123 |
-
if _hf_model is not None and _hf_tokenizer is not None:
|
124 |
return
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
-
|
127 |
-
import torch
|
128 |
|
129 |
-
|
130 |
-
|
|
|
|
|
131 |
|
132 |
-
|
133 |
-
|
134 |
|
135 |
-
|
136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
|
139 |
def get_chatbot_response(query: str) -> str:
|
|
|
16 |
from flask_login import LoginManager, login_required, current_user
|
17 |
from werkzeug.utils import secure_filename
|
18 |
import sys
|
|
|
19 |
from datetime import datetime
|
20 |
|
21 |
# Adjust sys.path for import flexibility
|
|
|
74 |
_hf_tokenizer = None
|
75 |
|
76 |
def init_hf_model() -> None:
|
77 |
+
"""Initialise the Hugging Face conversational model and tokenizer."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
global _hf_model, _hf_tokenizer
|
79 |
if _hf_model is not None and _hf_tokenizer is not None:
|
80 |
return
|
81 |
+
|
82 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
83 |
import torch
|
84 |
|
85 |
+
model_name = "facebook/blenderbot-400M-distill"
|
|
|
|
|
86 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
87 |
|
88 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
89 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
_hf_model = model
|
92 |
_hf_tokenizer = tokenizer
|
93 |
|
94 |
+
def init_chatbot() -> None:
|
95 |
+
"""Initialise the Chroma vector DB with chatbot.txt content."""
|
96 |
+
global _chatbot_embedder, _chatbot_collection
|
97 |
+
if _chatbot_embedder is not None and _chatbot_collection is not None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
return
|
99 |
+
|
100 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
101 |
+
from sentence_transformers import SentenceTransformer
|
102 |
+
import chromadb
|
103 |
+
from chromadb.config import Settings
|
104 |
+
import os
|
105 |
|
106 |
+
os.makedirs(CHATBOT_DB_DIR, exist_ok=True)
|
|
|
107 |
|
108 |
+
with open(CHATBOT_TXT_PATH, encoding="utf-8") as f:
|
109 |
+
text = f.read()
|
110 |
+
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
|
111 |
+
docs = [doc.strip() for doc in splitter.split_text(text)]
|
112 |
|
113 |
+
embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
114 |
+
embeddings = embedder.encode(docs, show_progress_bar=False, batch_size=32)
|
115 |
|
116 |
+
client = chromadb.Client(Settings(persist_directory=CHATBOT_DB_DIR, anonymized_telemetry=False))
|
117 |
+
collection = client.get_or_create_collection("chatbot")
|
118 |
+
ids = [f"doc_{i}" for i in range(len(docs))]
|
119 |
+
try:
|
120 |
+
existing = collection.get(ids=ids[:1])
|
121 |
+
if not existing.get("documents"):
|
122 |
+
raise ValueError("Empty Chroma DB")
|
123 |
+
except Exception:
|
124 |
+
collection.add(documents=docs, embeddings=embeddings, ids=ids)
|
125 |
+
|
126 |
+
_chatbot_embedder = embedder
|
127 |
+
_chatbot_collection = collection
|
128 |
|
129 |
|
130 |
def get_chatbot_response(query: str) -> str:
|