Spaces:

sagar007
/

lama_storm_8b

Sleeping

App Files Files Community

lama_storm_8b / app.py

sagar007

Update app.py

7e9dd79 verified about 1 year ago

raw

history blame

2.53 kB

	import gradio as gr
	import spaces
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# HTML template for custom UI
	HTML_TEMPLATE = """
	<style>
	.llama-image {
	display: flex;
	justify-content: center;
	margin-bottom: 20px;
	}
	.llama-image img {
	max-width: 300px;
	border-radius: 10px;
	box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
	}
	.llama-description {
	text-align: center;
	font-weight: bold;
	margin-top: 10px;
	}
	</style>
	<div class="llama-image">
	<img src="https://cdn-uploads.huggingface.co/production/uploads/64c75c1237333ccfef30a602/tmOlbERGKP7JSODa6T06J.jpeg" alt="Llama">
	<div class="llama-description">Llama-3.1-Storm-8B Model</div>
	</div>
	<h1>Llama-3.1-Storm-8B Text Generation</h1>
	<p>Generate text using the powerful Llama-3.1-Storm-8B model. Enter a prompt and let the AI create!</p>
	"""

	# Load the model and tokenizer
	model_name = "akjindal53244/Llama-3.1-Storm-8B"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.bfloat16,
	device_map="auto"
	)

	@spaces.GPU(duration=120)
	def generate_text(prompt, max_length, temperature):
	messages = [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt}
	]
	formatted_prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)

	inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)

	outputs = model.generate(
	**inputs,
	max_new_tokens=max_length,
	do_sample=True,
	temperature=temperature,
	top_k=100,
	top_p=0.95,
	)

	return tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)

	# Create Gradio interface
	iface = gr.Interface(
	fn=generate_text,
	inputs=[
	gr.Textbox(lines=5, label="Prompt"),
	gr.Slider(minimum=1, maximum=500, value=128, step=1, label="Max Length"),
	gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
	],
	outputs=gr.Textbox(lines=10, label="Generated Text"),
	title="Llama-3.1-Storm-8B Text Generation",
	description="Enter a prompt to generate text using the Llama-3.1-Storm-8B model.",
	article=None,
	css=".gradio-container {max-width: 800px; margin: auto;}",
	)

	iface.launch(
	additional_inputs=[
	gr.HTML(HTML_TEMPLATE)
	]
	)