File size: 4,270 Bytes
985eabb
cc1b568
985eabb
cc1b568
 
5232bb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc1b568
1f7ba92
 
cc1b568
 
1f7ba92
cc1b568
1f7ba92
02a0e92
cc1b568
1f7ba92
02a0e92
1f7ba92
 
02a0e92
1f7ba92
 
fcba473
cc1b568
 
 
1f7ba92
 
 
 
 
02a0e92
 
cc1b568
1f7ba92
5232bb4
1f7ba92
 
 
 
 
 
 
 
 
 
5232bb4
1f7ba92
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import gradio as gr
import spaces
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# HTML template for custom UI
HTML_TEMPLATE = """
<style>
    body { background: linear-gradient(135deg, #f5f7fa, #c3cfe2); }
    #app-header {
        text-align: center;
        background: rgba(255, 255, 255, 0.8);
        padding: 20px;
        border-radius: 10px;
        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
        position: relative;
    }
    #app-header h1 { color: #4CAF50; font-size: 2em; margin-bottom: 10px; }
    .concept { position: relative; transition: transform 0.3s; }
    .concept:hover { transform: scale(1.1); }
    .concept img {
        width: 100px;
        border-radius: 10px;
        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
    }
    .concept-description {
        position: absolute;
        bottom: -30px;
        left: 50%;
        transform: translateX(-50%);
        background-color: #4CAF50;
        color: white;
        padding: 5px 10px;
        border-radius: 5px;
        opacity: 0;
        transition: opacity 0.3s;
    }
    .concept:hover .concept-description { opacity: 1; }
    .artifact {
        position: absolute;
        background: rgba(76, 175, 80, 0.1);
        border-radius: 50%;
    }
    .artifact.large { width: 300px; height: 300px; top: -50px; left: -150px; }
    .artifact.medium { width: 200px; height: 200px; bottom: -50px; right: -100px; }
    .artifact.small {
        width: 100px;
        height: 100px;
        top: 50%;
        left: 50%;
        transform: translate(-50%, -50%);
    }
</style>
<div id="app-header">
    <div class="artifact large"></div>
    <div class="artifact medium"></div>
    <div class="artifact small"></div>
    <h1>Llama-3.1-Storm-8B Text Generator</h1>
    <p>Generate text using the Llama-3.1-Storm-8B model by providing a prompt.</p>
    <div style="display: flex; justify-content: center; gap: 20px; margin-top: 20px;">
        <div class="concept">
            <img src="https://raw.githubusercontent.com/huggingface/huggingface.js/main/packages/inference/src/tasks/images/llama.png" alt="Llama">
            <div class="concept-description">Llama Model</div>
        </div>
        <div class="concept">
            <img src="https://raw.githubusercontent.com/huggingface/huggingface.js/main/packages/inference/src/tasks/images/language.png" alt="Language">
            <div class="concept-description">Natural Language Processing</div>
        </div>
        <div class="concept">
            <img src="https://raw.githubusercontent.com/huggingface/huggingface.js/main/packages/inference/src/tasks/images/text-generation.png" alt="Text Generation">
            <div class="concept-description">Text Generation</div>
        </div>
    </div>
</div>
"""

# Load the model and tokenizer
model_name = "akjindal53244/Llama-3.1-Storm-8B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

@spaces.GPU(duration=120)
def generate_text(prompt, max_length, temperature):
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]
    formatted_prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
    
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
    
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_length,
        do_sample=True,
        temperature=temperature,
        top_k=100,
        top_p=0.95,
    )
    
    return tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)

# Create Gradio interface
iface = gr.Interface(
    fn=generate_text,
    inputs=[
        gr.Textbox(lines=5, label="Prompt"),
        gr.Slider(minimum=1, maximum=500, value=128, step=1, label="Max Length"),
        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
    ],
    outputs=gr.Textbox(lines=10, label="Generated Text"),
    title="Llama-3.1-Storm-8B Text Generation",
    description="Enter a prompt to generate text using the Llama-3.1-Storm-8B model.",
    article=HTML_TEMPLATE
)

iface.launch()