Spaces:

sagar007
/

lama_storm_8b

Sleeping

File size: 2,527 Bytes

985eabb
7e9dd79
985eabb
7e9dd79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc1b568
8b8d0cf
 
 
7e9dd79
 
8b8d0cf
 
 
 
7e9dd79
1f7ba92
02a0e92
1f7ba92
 
02a0e92
1f7ba92
 
7e9dd79
 
 
 
1f7ba92
 
 
 
 
02a0e92
 
7e9dd79
1e235cc
7e9dd79
 
 
 
 
 
 
 
 
 
 
 
 
 
15967e4
7e9dd79

import gradio as gr
import spaces
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# HTML template for custom UI
HTML_TEMPLATE = """
<style>
    .llama-image {
        display: flex;
        justify-content: center;
        margin-bottom: 20px;
    }
    .llama-image img {
        max-width: 300px;
        border-radius: 10px;
        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
    }
    .llama-description {
        text-align: center;
        font-weight: bold;
        margin-top: 10px;
    }
</style>
<div class="llama-image">
    <img src="https://cdn-uploads.huggingface.co/production/uploads/64c75c1237333ccfef30a602/tmOlbERGKP7JSODa6T06J.jpeg" alt="Llama">
    <div class="llama-description">Llama-3.1-Storm-8B Model</div>
</div>
<h1>Llama-3.1-Storm-8B Text Generation</h1>
<p>Generate text using the powerful Llama-3.1-Storm-8B model. Enter a prompt and let the AI create!</p>
"""

# Load the model and tokenizer
model_name = "akjindal53244/Llama-3.1-Storm-8B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

@spaces.GPU(duration=120)
def generate_text(prompt, max_length, temperature):
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]
    formatted_prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
    
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
    
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_length,
        do_sample=True,
        temperature=temperature,
        top_k=100,
        top_p=0.95,
    )
    
    return tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)

# Create Gradio interface
iface = gr.Interface(
    fn=generate_text,
    inputs=[
        gr.Textbox(lines=5, label="Prompt"),
        gr.Slider(minimum=1, maximum=500, value=128, step=1, label="Max Length"),
        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
    ],
    outputs=gr.Textbox(lines=10, label="Generated Text"),
    title="Llama-3.1-Storm-8B Text Generation",
    description="Enter a prompt to generate text using the Llama-3.1-Storm-8B model.",
    article=None,
    css=".gradio-container {max-width: 800px; margin: auto;}",
)

iface.launch(
    additional_inputs=[
        gr.HTML(HTML_TEMPLATE)
    ]
)