Spaces:
Sleeping
Sleeping
File size: 4,087 Bytes
c5cf99a 4a5b989 816e8cd 4a5b989 816e8cd 4a5b989 c5cf99a 30f6dfa c5cf99a 30f6dfa 816e8cd 4a5b989 816e8cd c5cf99a 816e8cd 00807c3 4a5b989 e4739c2 816e8cd 4a5b989 c5cf99a 4a5b989 816e8cd c5cf99a 816e8cd 4a5b989 816e8cd 4a5b989 816e8cd 4a5b989 c5cf99a 816e8cd 2e0c2b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
# app.py (最终确认版 - 使用 gr.Blocks)
import gradio as gr
from langchain.prompts import PromptTemplate
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_community.llms import LlamaCpp
from huggingface_hub import hf_hub_download
import os
import time
# --- 1. 配置 (保持不变) ---
VECTOR_STORE_PATH = "vector_store"
EMBEDDING_MODEL = "BAAI/bge-large-zh-v1.5"
# 切换到 CapybaraHermes-2.5-Mistral-7B 模型
GGUF_MODEL_REPO = "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF"
# 我们选择一个大小适中的4位量化版本
GGUF_MODEL_FILE = "capybarahermes-2.5-mistral-7b.Q4_K_M.gguf"
# --- 2. 加载RAG管道 (保持不变) ---
def load_rag_chain():
print("开始加载RAG管道...")
embeddings = HuggingFaceBgeEmbeddings(model_name=EMBEDDING_MODEL, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': True})
if not os.path.exists(VECTOR_STORE_PATH): raise FileNotFoundError(f"错误:向量数据库 '{VECTOR_STORE_PATH}' 不存在!")
vector_store = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
model_path = hf_hub_download(repo_id=GGUF_MODEL_REPO, filename=GGUF_MODEL_FILE, local_dir="models")
llm = LlamaCpp(model_path=model_path, n_gpu_layers=0, n_batch=512, n_ctx=4096, f16_kv=True, verbose=False)
# 使用为Mistral模型优化的Prompt模板
prompt_template = """<|im_start|>system
You are a helpful assistant named "粤小智". Answer the user's question in Chinese based on the provided "Context".
If the context is not sufficient, just say: "抱歉,关于您的问题,我的知识库暂时没有相关信息。". Do not make up answers.
Your answer should be clear and step-by-step if it's an operation guide.<|im_end|>
<|im_start|>user
Context:
{context}
Question:
{question}<|im_end|>
<|im_start|>assistant
"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
retriever = vector_store.as_retriever(
search_type="similarity_score_threshold",
search_kwargs={'score_threshold': 0.3, 'k': 3}
)
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever, # 使用我们新创建的retriever
chain_type_kwargs={"prompt": PROMPT}
)
print("✅ RAG管道加载完毕!")
return qa_chain
# --- 3. Gradio应用逻辑 (适配gr.Blocks) ---
RAG_CHAIN = load_rag_chain()
def user(user_message, history):
# 将用户消息添加到聊天记录中,并返回一个空的输入框
return "", history + [[user_message, None]]
def bot(history):
# 获取最后一条用户消息
user_message = history[-1][0]
print(f"收到用户消息: '{user_message}'")
# 调用RAG链获取回答
result = RAG_CHAIN.invoke({"query": user_message})
bot_message = result.get('result', "处理出错").strip()
# 模拟打字效果
history[-1][1] = ""
for character in bot_message:
history[-1][1] += character
time.sleep(0.02) # 每个字之间暂停0.02秒
yield history
print(f"模型生成回答: '{history[-1][1]}'")
# --- 4. 搭建并启动界面 (使用gr.Blocks手动搭建) ---
with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}") as demo:
gr.Markdown("# 粤政云服务智能向导 - 我是粤小智 🤖")
chatbot = gr.Chatbot(
[],
elem_id="chatbot",
label="聊天窗口",
bubble_full_width=True,
height=600
)
with gr.Row():
txt = gr.Textbox(
scale=4,
show_label=False,
placeholder="在这里输入您的问题,然后按回车键...",
container=False,
)
# 定义回车或点击按钮后的事件流
txt.submit(user, [txt, chatbot], [txt, chatbot], queue=False).then(
bot, chatbot, chatbot
)
# 使用queue()来处理流式(打字效果)输出
demo.queue()
demo.launch() |