File size: 4,087 Bytes
c5cf99a
4a5b989
 
 
 
 
 
 
 
816e8cd
4a5b989
816e8cd
4a5b989
 
c5cf99a
30f6dfa
c5cf99a
30f6dfa
816e8cd
 
4a5b989
 
816e8cd
 
 
 
 
c5cf99a
 
816e8cd
 
 
 
00807c3
 
 
 
 
 
 
 
4a5b989
e4739c2
 
 
 
 
 
 
 
 
 
 
816e8cd
4a5b989
 
c5cf99a
4a5b989
 
816e8cd
 
 
 
 
 
 
 
 
 
 
 
 
c5cf99a
816e8cd
 
 
 
 
 
 
4a5b989
816e8cd
 
4a5b989
816e8cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a5b989
 
c5cf99a
816e8cd
2e0c2b5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# app.py (最终确认版 - 使用 gr.Blocks)
import gradio as gr
from langchain.prompts import PromptTemplate
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_community.llms import LlamaCpp
from huggingface_hub import hf_hub_download
import os
import time

# --- 1. 配置 (保持不变) ---
VECTOR_STORE_PATH = "vector_store"
EMBEDDING_MODEL = "BAAI/bge-large-zh-v1.5"
# 切换到 CapybaraHermes-2.5-Mistral-7B 模型
GGUF_MODEL_REPO = "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF"
# 我们选择一个大小适中的4位量化版本
GGUF_MODEL_FILE = "capybarahermes-2.5-mistral-7b.Q4_K_M.gguf"

# --- 2. 加载RAG管道 (保持不变) ---
def load_rag_chain():
    print("开始加载RAG管道...")
    embeddings = HuggingFaceBgeEmbeddings(model_name=EMBEDDING_MODEL, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': True})
    if not os.path.exists(VECTOR_STORE_PATH): raise FileNotFoundError(f"错误:向量数据库 '{VECTOR_STORE_PATH}' 不存在!")
    vector_store = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
    model_path = hf_hub_download(repo_id=GGUF_MODEL_REPO, filename=GGUF_MODEL_FILE, local_dir="models")
    llm = LlamaCpp(model_path=model_path, n_gpu_layers=0, n_batch=512, n_ctx=4096, f16_kv=True, verbose=False)
    
    # 使用为Mistral模型优化的Prompt模板
    prompt_template = """<|im_start|>system
You are a helpful assistant named "粤小智". Answer the user's question in Chinese based on the provided "Context".
If the context is not sufficient, just say: "抱歉,关于您的问题,我的知识库暂时没有相关信息。". Do not make up answers.
Your answer should be clear and step-by-step if it's an operation guide.<|im_end|>
<|im_start|>user
Context:
{context}

Question:
{question}<|im_end|>
<|im_start|>assistant
"""
    PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
    retriever = vector_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={'score_threshold': 0.3, 'k': 3}
)

    qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever, # 使用我们新创建的retriever
    chain_type_kwargs={"prompt": PROMPT}
)
    print("✅ RAG管道加载完毕!")
    return qa_chain

# --- 3. Gradio应用逻辑 (适配gr.Blocks) ---
RAG_CHAIN = load_rag_chain()

def user(user_message, history):
    # 将用户消息添加到聊天记录中,并返回一个空的输入框
    return "", history + [[user_message, None]]

def bot(history):
    # 获取最后一条用户消息
    user_message = history[-1][0]
    print(f"收到用户消息: '{user_message}'")
    
    # 调用RAG链获取回答
    result = RAG_CHAIN.invoke({"query": user_message})
    bot_message = result.get('result', "处理出错").strip()
    
    # 模拟打字效果
    history[-1][1] = ""
    for character in bot_message:
        history[-1][1] += character
        time.sleep(0.02) # 每个字之间暂停0.02秒
        yield history
    
    print(f"模型生成回答: '{history[-1][1]}'")

# --- 4. 搭建并启动界面 (使用gr.Blocks手动搭建) ---
with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}") as demo:
    gr.Markdown("# 粤政云服务智能向导 - 我是粤小智 🤖")
    chatbot = gr.Chatbot(
        [],
        elem_id="chatbot",
        label="聊天窗口",
        bubble_full_width=True,
        height=600
    )
    with gr.Row():
        txt = gr.Textbox(
            scale=4,
            show_label=False,
            placeholder="在这里输入您的问题,然后按回车键...",
            container=False,
        )
    
    # 定义回车或点击按钮后的事件流
    txt.submit(user, [txt, chatbot], [txt, chatbot], queue=False).then(
        bot, chatbot, chatbot
    )

# 使用queue()来处理流式(打字效果)输出
demo.queue()
demo.launch()