File size: 1,601 Bytes
ef147c3
 
 
 
 
 
 
 
 
 
 
 
 
c9049fc
72750b5
 
c9049fc
72750b5
 
c9049fc
72750b5
 
c9049fc
72750b5
 
c9049fc
72750b5
 
c9049fc
72750b5
 
 
 
 
c9049fc
72750b5
c9049fc
 
72750b5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
import urllib.request

# Download dataset only once (if not already downloaded)
dataset_url = "https://huggingface.co/datasets/bitext/Bitext-customer-support-llm-chatbot-training-dataset/resolve/main/Bitext_Sample_Customer_Support_Training_Dataset_27K__Labeled.csv"
dataset_file = "bitext_dataset.csv"

if not os.path.exists(dataset_file):
    print("Downloading Bitext dataset...")
    urllib.request.urlretrieve(dataset_url, dataset_file)
    print("Download complete.")


import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

# Load fine-tuned TinyLLaMA model from Hugging Face
model_name = "your-username/tinyllama-qlora-support-bot"  # 🔁 Replace with your actual HF repo name

# Use FP16 if supported, fallback to CPU
device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16 if device=="cuda" else torch.float32).to(device)

# Pipeline for response generation
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if device=="cuda" else -1)

def chatbot(message, history=[]):
    prompt = f"### Instruction:\n{message}\n\n### Response:\n"
    output = generator(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)
    response = output[0]["generated_text"].split("### Response:\n")[-1].strip()
    return response

interface = gr.ChatInterface(fn=chatbot, title="🦙 LLaMA Support Chatbot", theme="soft")

if __name__ == "__main__":
    interface.launch()