Spaces:
Runtime error
Runtime error
File size: 2,601 Bytes
5a150bf 90f142b 5a150bf 90f142b 5a150bf 90f142b 5a150bf 90f142b 5a150bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
# app.py
import gradio as gr
import torch
from datasets import load_dataset
from transformers import (
AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, BitsAndBytesConfig, pipeline
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
# Load dataset
dataset = load_dataset(
"json",
data_files="https://huggingface.co/datasets/bitext/Bitext-customer-support-llm-chatbot-training-dataset/resolve/main/bitext_customer_support.jsonl",
split="train[:100]" # Keep it small to avoid timeouts
)
def format(example):
return {
"text": f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}"
}
dataset = dataset.map(format)
# Tokenizer
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
def tokenize(example):
tokens = tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)
tokens["labels"] = tokens["input_ids"].copy()
return tokens
tokenized_dataset = dataset.map(tokenize, batched=True)
# QLoRA setup
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
quantization_config=bnb_config
)
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
lora_config = LoraConfig(
r=8,
lora_alpha=32,
lora_dropout=0.05,
bias="none",
target_modules=["q_proj", "v_proj"],
task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
# Training
training_args = TrainingArguments(
output_dir="trained_model",
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
learning_rate=2e-4,
num_train_epochs=1,
logging_dir="./logs",
save_strategy="no",
bf16=True,
report_to="none",
optim="paged_adamw_8bit"
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset,
tokenizer=tokenizer
)
trainer.train()
# Inference pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
def chatbot(instruction):
prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"
response = pipe(prompt, max_new_tokens=100)[0]['generated_text']
return response[len(prompt):].strip()
gr.Interface(
fn=chatbot,
inputs="text",
outputs="text",
title="Fine-Tuned TinyLlama Bitext Chatbot"
).launch()
|