Spaces:

ThomasBasil
/

slm-customer-support-chatbot

Paused

App Files Files Community

BasilTh commited on Aug 7

Commit

eff99d8

1 Parent(s): cca6a14

Deploy updated SLM customer-support chatbot

Browse files

Files changed (4) hide show

README.md +3 -6
SLM_CService.py +2 -0
app.py +75 -16
requirements.txt +1 -0

README.md CHANGED Viewed

@@ -1,11 +1,8 @@
 ---
-license: mit
-title: Customer Support Chatbot
-sdk: gradio
-emoji: 🐨
 colorFrom: blue
 colorTo: purple
 sdk_version: "5.41.1"
 app_file: app.py
-pinned: false
----

 ---
+title: "Customer Support Chatbot"
+emoji: "🛎"
 colorFrom: blue
 colorTo: purple
+sdk: gradio
 sdk_version: "5.41.1"
 app_file: app.py

SLM_CService.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import unsloth                                         # patch before transformers
 import torch, triton
 from transformers import pipeline, AutoTokenizer

+import os
+os.environ["OMP_NUM_THREADS"] = "1"
 import unsloth                                         # patch before transformers
 import torch, triton
 from transformers import pipeline, AutoTokenizer

app.py CHANGED Viewed

@@ -1,22 +1,81 @@
-import unsloth            # patch Transformers before any other imports
 import gradio as gr
-from SLM_CService import chat_with_memory
-def reset_chat():
-    """Clears the conversation."""
-    return [], []  # empty history
-with gr.Blocks(css=".gradio-container { max-width: 700px }") as demo:
     gr.Markdown("# 🛎 Customer Support Chatbot")
     chatbot = gr.Chatbot()
     with gr.Row():
-        txt = gr.Textbox(show_label=False, placeholder="Type your message...")
-        sub = gr.Button("Send")
-    sub.click(
-        fn=lambda history, msg: (history + [[history[-1][1] if history else "", chat_with_memory(msg)]], []),
-        inputs=[chatbot, txt],
-        outputs=[chatbot, txt],
-    )
-    gr.Button("Clear").click(fn=reset_chat, outputs=[chatbot, txt])
-demo.launch()

+# app.py
+import os
+#  ── suppress libgomp warnings ─────────────────────────────────────────────
+os.environ["OMP_NUM_THREADS"] = "1"
+#  ── patch Unsloth before transformers ────────────────────────────────────
+import unsloth
+import torch
+import triton
+#  ── Gradio & model deps ──────────────────────────────────────────────────
 import gradio as gr
+from transformers import pipeline, AutoTokenizer, BitsAndBytesConfig
+from peft import PeftModel
+from langchain.memory import ConversationBufferMemory
+#  ── Load your fine-tuned QLoRA model ────────────────────────────────────
+BASE = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+ADAPTER_DIR = "tinyllama-qlora-adapters"  # or wherever you stored your adapters
+bnb_cfg = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="bnb_dynamic",
+    bnb_4bit_use_double_quant=True
+)
+tokenizer = AutoTokenizer.from_pretrained(ADAPTER_DIR, use_fast=False, local_files_only=True)
+tokenizer.pad_token_id = tokenizer.eos_token_id
+model = unsloth.FastLanguageModel.from_pretrained(
+    BASE,
+    load_in_4bit=True,
+    quant_type="bnb_dynamic",
+    device_map="auto",
+    trust_remote_code=True
+)
+model = PeftModel.from_pretrained(model, ADAPTER_DIR, local_files_only=True)
+chat_pipe = pipeline(
+    "conversational",
+    model=model,
+    tokenizer=tokenizer,
+    trust_remote_code=True,
+    return_full_text=False,
+    device_map="auto",
+    generation_kwargs={"max_new_tokens":128, "do_sample":True, "top_p":0.9, "temperature":0.7}
+)
+#  ── Conversational memory ────────────────────────────────────────────────
+memory = ConversationBufferMemory(
+    memory_key="chat_history",
+    human_prefix="User",
+    ai_prefix="Assistant",
+    return_messages=True
+)
+def respond(user_message, history):
+    # reconstruct history for LangChain
+    for msg in history:
+        if msg["role"] == "user":
+            chat_pipe.add_user_input(msg["content"])
+        else:
+            chat_pipe.append_response(msg["content"])
+    # get new reply
+    reply = chat_pipe(user_message).generated_responses[-1]
+    # save into memory
+    memory.save_context({"input": user_message}, {"output": reply})
+    return reply, memory.load_memory_variables({})["chat_history"]
+#  ── Gradio UI ────────────────────────────────────────────────────────────
+with gr.Blocks() as demo:
     gr.Markdown("# 🛎 Customer Support Chatbot")
     chatbot = gr.Chatbot()
     with gr.Row():
+        user_in = gr.Textbox(placeholder="Type your message here…")
+        send    = gr.Button("Send")
+        reset   = gr.Button("🔄 Reset Chat")
+    send.click(respond, [user_in, chatbot], [chatbot, chatbot])
+    reset.click(lambda: ([], []), None, [chatbot, chatbot])
+demo.launch(server_name="0.0.0.0", server_port=7860)

requirements.txt CHANGED Viewed

@@ -8,3 +8,4 @@ unsloth_zoo
 huggingface_hub
 sentencepiece
 torch

 huggingface_hub
 sentencepiece
 torch
+langchain>=0.0.250