Spaces:
Runtime error
Runtime error
# app.py | |
import gradio as gr | |
import torch | |
from datasets import load_dataset | |
from transformers import ( | |
AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, BitsAndBytesConfig, pipeline | |
) | |
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training | |
# Load dataset | |
dataset = load_dataset( | |
"json", | |
data_files="https://huggingface.co/datasets/bitext/Bitext-customer-support-llm-chatbot-training-dataset/resolve/main/bitext_customer_support.jsonl", | |
split="train[:100]" # Keep it small to avoid timeouts | |
) | |
def format(example): | |
return { | |
"text": f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}" | |
} | |
dataset = dataset.map(format) | |
# Tokenizer | |
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
tokenizer.pad_token = tokenizer.eos_token | |
def tokenize(example): | |
tokens = tokenizer(example["text"], truncation=True, padding="max_length", max_length=512) | |
tokens["labels"] = tokens["input_ids"].copy() | |
return tokens | |
tokenized_dataset = dataset.map(tokenize, batched=True) | |
# QLoRA setup | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_use_double_quant=True, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_compute_dtype=torch.float16 | |
) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
device_map="auto", | |
quantization_config=bnb_config | |
) | |
model.gradient_checkpointing_enable() | |
model = prepare_model_for_kbit_training(model) | |
lora_config = LoraConfig( | |
r=8, | |
lora_alpha=32, | |
lora_dropout=0.05, | |
bias="none", | |
target_modules=["q_proj", "v_proj"], | |
task_type="CAUSAL_LM" | |
) | |
model = get_peft_model(model, lora_config) | |
# Training | |
training_args = TrainingArguments( | |
output_dir="trained_model", | |
per_device_train_batch_size=2, | |
gradient_accumulation_steps=4, | |
learning_rate=2e-4, | |
num_train_epochs=1, | |
logging_dir="./logs", | |
save_strategy="no", | |
bf16=True, | |
report_to="none", | |
optim="paged_adamw_8bit" | |
) | |
trainer = Trainer( | |
model=model, | |
args=training_args, | |
train_dataset=tokenized_dataset, | |
tokenizer=tokenizer | |
) | |
trainer.train() | |
# Inference pipeline | |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
def chatbot(instruction): | |
prompt = f"### Instruction:\n{instruction}\n\n### Response:\n" | |
response = pipe(prompt, max_new_tokens=100)[0]['generated_text'] | |
return response[len(prompt):].strip() | |
gr.Interface( | |
fn=chatbot, | |
inputs="text", | |
outputs="text", | |
title="Fine-Tuned TinyLlama Bitext Chatbot" | |
).launch() | |