Spaces:

Athspi
/

Tttt

Sleeping

App Files Files Community

Tttt / app.py

Athspi

Create app.py

c21b225 verified 4 months ago

raw

history blame

1.83 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# Load model and tokenizer
	model_id = "suayptalha/FastLlama-3.2-3B-Instruct"
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.float16,
	device_map="auto"
	)

	# System prompt
	system_prompt = "You are a friendly assistant named FastLlama."

	def format_prompt(message: str, history: list):
	prompt = f"<\|system\|>\n{system_prompt}</s>\n"
	for user_msg, bot_msg in history:
	prompt += f"<\|user\|>\n{user_msg}</s>\n<\|assistant\|>\n{bot_msg}</s>\n"
	prompt += f"<\|user\|>\n{message}</s>\n<\|assistant\|>\n"
	return prompt

	def respond(message: str, history: list):
	# Format the prompt with chat history
	full_prompt = format_prompt(message, history)

	# Tokenize input
	inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)

	# Generate response
	output = model.generate(
	inputs.input_ids,
	max_new_tokens=256,
	temperature=0.7,
	top_p=0.9,
	repetition_penalty=1.1,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id
	)

	# Decode response
	response = tokenizer.decode(
	output[0][inputs.input_ids.shape[-1]:],
	skip_special_tokens=True
	)

	return response

	# Create chat interface
	chat = gr.ChatInterface(
	fn=respond,
	title="FastLlama-3.2B Chat",
	description="Chat with FastLlama-3.2-3B-Instruct AI assistant",
	examples=[
	["Explain quantum computing in simple terms"],
	["Write a poem about artificial intelligence"],
	["What's the meaning of life?"]
	],
	cache_examples=False
	)

	# Launch the app
	if __name__ == "__main__":
	chat.launch(server_name="0.0.0.0")