BasilTh commited on
Commit
eff99d8
Β·
1 Parent(s): cca6a14

Deploy updated SLM customer-support chatbot

Browse files
Files changed (4) hide show
  1. README.md +3 -6
  2. SLM_CService.py +2 -0
  3. app.py +75 -16
  4. requirements.txt +1 -0
README.md CHANGED
@@ -1,11 +1,8 @@
1
  ---
2
- license: mit
3
- title: Customer Support Chatbot
4
- sdk: gradio
5
- emoji: 🐨
6
  colorFrom: blue
7
  colorTo: purple
 
8
  sdk_version: "5.41.1"
9
  app_file: app.py
10
- pinned: false
11
- ---
 
1
  ---
2
+ title: "Customer Support Chatbot"
3
+ emoji: "πŸ›Ž"
 
 
4
  colorFrom: blue
5
  colorTo: purple
6
+ sdk: gradio
7
  sdk_version: "5.41.1"
8
  app_file: app.py
 
 
SLM_CService.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import unsloth # patch before transformers
2
  import torch, triton
3
  from transformers import pipeline, AutoTokenizer
 
1
+ import os
2
+ os.environ["OMP_NUM_THREADS"] = "1"
3
  import unsloth # patch before transformers
4
  import torch, triton
5
  from transformers import pipeline, AutoTokenizer
app.py CHANGED
@@ -1,22 +1,81 @@
1
- import unsloth # patch Transformers before any other imports
 
 
 
 
 
 
 
 
 
 
2
  import gradio as gr
3
- from SLM_CService import chat_with_memory
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- def reset_chat():
6
- """Clears the conversation."""
7
- return [], [] # empty history
 
 
 
 
 
8
 
9
- with gr.Blocks(css=".gradio-container { max-width: 700px }") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  gr.Markdown("# πŸ›Ž Customer Support Chatbot")
11
  chatbot = gr.Chatbot()
12
  with gr.Row():
13
- txt = gr.Textbox(show_label=False, placeholder="Type your message...")
14
- sub = gr.Button("Send")
15
- sub.click(
16
- fn=lambda history, msg: (history + [[history[-1][1] if history else "", chat_with_memory(msg)]], []),
17
- inputs=[chatbot, txt],
18
- outputs=[chatbot, txt],
19
- )
20
- gr.Button("Clear").click(fn=reset_chat, outputs=[chatbot, txt])
21
-
22
- demo.launch()
 
1
+ # app.py
2
+ import os
3
+ # ── suppress libgomp warnings ─────────────────────────────────────────────
4
+ os.environ["OMP_NUM_THREADS"] = "1"
5
+
6
+ # ── patch Unsloth before transformers ────────────────────────────────────
7
+ import unsloth
8
+ import torch
9
+ import triton
10
+
11
+ # ── Gradio & model deps ──────────────────────────────────────────────────
12
  import gradio as gr
13
+ from transformers import pipeline, AutoTokenizer, BitsAndBytesConfig
14
+ from peft import PeftModel
15
+ from langchain.memory import ConversationBufferMemory
16
+
17
+ # ── Load your fine-tuned QLoRA model ────────────────────────────────────
18
+ BASE = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
19
+ ADAPTER_DIR = "tinyllama-qlora-adapters" # or wherever you stored your adapters
20
+
21
+ bnb_cfg = BitsAndBytesConfig(
22
+ load_in_4bit=True,
23
+ bnb_4bit_quant_type="bnb_dynamic",
24
+ bnb_4bit_use_double_quant=True
25
+ )
26
+
27
+ tokenizer = AutoTokenizer.from_pretrained(ADAPTER_DIR, use_fast=False, local_files_only=True)
28
+ tokenizer.pad_token_id = tokenizer.eos_token_id
29
 
30
+ model = unsloth.FastLanguageModel.from_pretrained(
31
+ BASE,
32
+ load_in_4bit=True,
33
+ quant_type="bnb_dynamic",
34
+ device_map="auto",
35
+ trust_remote_code=True
36
+ )
37
+ model = PeftModel.from_pretrained(model, ADAPTER_DIR, local_files_only=True)
38
 
39
+ chat_pipe = pipeline(
40
+ "conversational",
41
+ model=model,
42
+ tokenizer=tokenizer,
43
+ trust_remote_code=True,
44
+ return_full_text=False,
45
+ device_map="auto",
46
+ generation_kwargs={"max_new_tokens":128, "do_sample":True, "top_p":0.9, "temperature":0.7}
47
+ )
48
+
49
+ # ── Conversational memory ────────────────────────────────────────────────
50
+ memory = ConversationBufferMemory(
51
+ memory_key="chat_history",
52
+ human_prefix="User",
53
+ ai_prefix="Assistant",
54
+ return_messages=True
55
+ )
56
+
57
+ def respond(user_message, history):
58
+ # reconstruct history for LangChain
59
+ for msg in history:
60
+ if msg["role"] == "user":
61
+ chat_pipe.add_user_input(msg["content"])
62
+ else:
63
+ chat_pipe.append_response(msg["content"])
64
+ # get new reply
65
+ reply = chat_pipe(user_message).generated_responses[-1]
66
+ # save into memory
67
+ memory.save_context({"input": user_message}, {"output": reply})
68
+ return reply, memory.load_memory_variables({})["chat_history"]
69
+
70
+ # ── Gradio UI ────────────────────────────────────────────────────────────
71
+ with gr.Blocks() as demo:
72
  gr.Markdown("# πŸ›Ž Customer Support Chatbot")
73
  chatbot = gr.Chatbot()
74
  with gr.Row():
75
+ user_in = gr.Textbox(placeholder="Type your message here…")
76
+ send = gr.Button("Send")
77
+ reset = gr.Button("πŸ”„ Reset Chat")
78
+ send.click(respond, [user_in, chatbot], [chatbot, chatbot])
79
+ reset.click(lambda: ([], []), None, [chatbot, chatbot])
80
+
81
+ demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
requirements.txt CHANGED
@@ -8,3 +8,4 @@ unsloth_zoo
8
  huggingface_hub
9
  sentencepiece
10
  torch
 
 
8
  huggingface_hub
9
  sentencepiece
10
  torch
11
+ langchain>=0.0.250