File size: 4,135 Bytes
a474012
665b7ce
4f7e40d
a474012
665b7ce
a474012
9faf370
a474012
 
 
 
 
 
665b7ce
a474012
665b7ce
 
a474012
 
 
665b7ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a474012
 
 
 
665b7ce
 
 
 
 
a474012
 
 
665b7ce
 
a474012
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
665b7ce
a474012
 
665b7ce
a474012
665b7ce
a474012
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import os
import time
import threading
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import torch

# Carregar modelo local
model_id = "lambdaindie/lambda-1v-1B"  # Substitua se quiser
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
model.to("cuda" if torch.cuda.is_available() else "cpu")
model.eval()

# Estilo
css = """
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono&display=swap');
* {
    font-family: 'JetBrains Mono', monospace !important;
}
html, body, .gradio-container {
    background-color: #111 !important;
    color: #e0e0e0 !important;
}
textarea, input, button, select {
    background-color: transparent !important;
    color: #e0e0e0 !important;
    border: 1px solid #444 !important;
}
.markdown-think {
    background-color: #1e1e1e;
    border-left: 4px solid #555;
    padding: 10px;
    margin-bottom: 8px;
    font-style: italic;
    white-space: pre-wrap;
    animation: pulse 1.5s infinite ease-in-out;
}
@keyframes pulse {
    0% { opacity: 0.6; }
    50% { opacity: 1.0; }
    100% { opacity: 0.6; }
}
"""

theme = gr.themes.Base(
    primary_hue="gray",
    font=[
        gr.themes.GoogleFont("JetBrains Mono"),
        "monospace"
    ]
).set(
    body_background_fill="#111",
    body_text_color="#e0e0e0",
    button_primary_background_fill="#333",
    button_primary_text_color="#e0e0e0",
    input_background_fill="#222",
    input_border_color="#444",
    block_title_text_color="#fff"
)

# Flag para parar
stop_signal = False

def stop_stream():
    global stop_signal
    stop_signal = True

def respond(message, history, system_message, max_tokens, temperature, top_p):
    global stop_signal
    stop_signal = False

    # Construção do prompt
    prompt = ""
    if system_message:
        prompt += f"{system_message}\n\n"

    for msg in history:
        role = msg["role"]
        content = msg["content"]
        if role == "user":
            prompt += f"User: {content}\n"
        elif role == "assistant":
            prompt += f"Assistant: {content}\n"

    prompt += f"User: {message}\nAssistant:"

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    generation_kwargs = dict(
        **inputs,
        streamer=streamer,
        max_new_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        do_sample=True,
    )

    thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()

    output = ""
    start = time.time()

    for token in streamer:
        if stop_signal:
            break
        output += token
        yield {"role": "assistant", "content": output}

    end = time.time()
    yield {"role": "system", "content": f"Pensou por {end - start:.1f} segundos"}

# Interface
with gr.Blocks(css=css, theme=theme) as app:
    chatbot = gr.Chatbot(label="λ", type="messages")

    with gr.Row():
        msg = gr.Textbox(label="Mensagem")
        send_btn = gr.Button("Enviar")
        stop_btn = gr.Button("Parar")

    with gr.Accordion("Configurações Avançadas", open=False):
        system_message = gr.Textbox(label="System Message", value="")
        max_tokens = gr.Slider(64, 2048, value=256, step=1, label="Max Tokens")
        temperature = gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
        top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")

    state = gr.State([])

    def user_message_submit(user_msg, chat_history):
        if user_msg:
            chat_history = chat_history + [{"role": "user", "content": user_msg}]
        return "", chat_history

    send_btn.click(fn=user_message_submit, inputs=[msg, state], outputs=[msg, state])\
        .then(fn=respond, inputs=[msg, state, system_message, max_tokens, temperature, top_p], outputs=chatbot)

    stop_btn.click(fn=stop_stream, inputs=[], outputs=[])

app.launch(share=True)