Spaces:

Athspi
/

Tttt

Sleeping

App Files Files Community

Tttt / app.py

Athspi

Update app.py

aa37cb9 verified 4 months ago

raw

history blame

2.11 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# Load model and tokenizer
	model_id = "suayptalha/FastLlama-3.2-3B-Instruct"
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.float16,
	device_map="auto"
	)

	# Explicitly set padding token
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	# System prompt
	system_prompt = "You are a friendly assistant named FastLlama."

	def format_prompt(message: str, history: list):
	prompt = f"<\|system\|>\n{system_prompt}</s>\n"
	for user_msg, bot_msg in history:
	prompt += f"<\|user\|>\n{user_msg}</s>\n<\|assistant\|>\n{bot_msg}</s>\n"
	prompt += f"<\|user\|>\n{message}</s>\n<\|assistant\|>\n"
	return prompt

	def respond(message: str, history: list):
	# Format the prompt with chat history
	full_prompt = format_prompt(message, history)

	# Tokenize input with attention mask
	inputs = tokenizer(
	full_prompt,
	return_tensors="pt",
	padding=True,
	truncation=True
	).to(model.device)

	# Generate response with attention mask
	output = model.generate(
	inputs.input_ids,
	attention_mask=inputs.attention_mask,
	max_new_tokens=256,
	temperature=0.7,
	top_p=0.9,
	repetition_penalty=1.1,
	do_sample=True,
	pad_token_id=tokenizer.pad_token_id
	)

	# Decode response while skipping special tokens
	response = tokenizer.decode(
	output[0][inputs.input_ids.shape[-1]:],
	skip_special_tokens=True
	)

	return response

	# Create chat interface
	chat = gr.ChatInterface(
	fn=respond,
	title="FastLlama-3.2B Chat",
	description="Chat with FastLlama-3.2-3B-Instruct AI assistant",
	examples=[
	["Explain quantum computing in simple terms"],
	["Write a poem about artificial intelligence"],
	["What's the meaning of life?"]
	],
	cache_examples=False
	)

	if __name__ == "__main__":
	chat.launch(server_name="0.0.0.0")