Spaces:
Running
Running
File size: 3,804 Bytes
eb450e3 1906c0e 582395b eb450e3 1906c0e eb450e3 b51f88d 5113576 1906c0e 582395b 45634ef 582395b 45634ef 582395b 5113576 582395b e3c453c 3cfecb5 582395b fa8b0f1 582395b 1906c0e 582395b 45634ef f004227 1906c0e 582395b 1906c0e eb450e3 f004227 1906c0e f004227 1906c0e da0a172 1906c0e da0a172 1906c0e da0a172 1906c0e da0a172 5113576 1906c0e 5113576 eb450e3 582395b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import torch
import threading
import time
model_id = "lambdaindie/lambdai"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
css = """
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono&display=swap');
* { font-family: 'JetBrains Mono', monospace !important; }
body { background-color: #111; color: #e0e0e0; }
.markdown-think {
background-color: #1e1e1e;
border-left: 4px solid #555;
padding: 10px;
margin-bottom: 8px;
font-style: italic;
white-space: pre-wrap;
animation: pulse 1.5s infinite ease-in-out;
}
@keyframes pulse {
0% { opacity: 0.6; }
50% { opacity: 1.0; }
100% { opacity: 0.6; }
}
"""
def respond(message, history, system_message, max_tokens, temperature, top_p):
messages = [{"role": "system", "content": system_message}] if system_message else []
for user, assistant in history:
if user:
messages.append({"role": "user", "content": user})
if assistant:
messages.append({"role": "assistant", "content": assistant})
thinking_prompt = messages + [{"role": "user", "content": f"{message}\n\nThink step-by-step."}]
prompt = tokenizer.apply_chat_template(thinking_prompt, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors="pt").to(device)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
reasoning = ""
yield '<div class="markdown-think">Thinking...</div>'
start = time.time()
thread = threading.Thread(target=model.generate, kwargs={
"inputs": inputs["input_ids"],
"max_new_tokens": max_tokens,
"temperature": temperature,
"top_p": top_p,
"streamer": streamer,
})
thread.start()
for token in streamer:
reasoning += token
yield f'<div class="markdown-think">{reasoning.strip()}</div>'
elapsed = time.time() - start
yield f"""<div style="margin-top:12px;padding:8px 12px;background-color:#222;border-left:4px solid #888;
font-family:'JetBrains Mono', monospace;color:#ccc;font-size:14px;">
Pensou por {elapsed:.1f} segundos</div>"""
# Segunda etapa: resposta final
final_prompt = messages + [
{"role": "user", "content": message},
{"role": "assistant", "content": reasoning.strip()},
{"role": "user", "content": "Agora responda baseado nisso."}
]
prompt2 = tokenizer.apply_chat_template(final_prompt, tokenize=False, add_generation_prompt=True)
inputs2 = tokenizer(prompt2, return_tensors="pt").to(device)
streamer2 = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
thread2 = threading.Thread(target=model.generate, kwargs={
"inputs": inputs2["input_ids"],
"max_new_tokens": max_tokens,
"temperature": temperature,
"top_p": top_p,
"streamer": streamer2,
})
thread2.start()
final_answer = ""
for token in streamer2:
final_answer += token
yield final_answer.strip()
demo = gr.ChatInterface(
fn=respond,
title="λambdAI",
theme=gr.themes.Base(),
css=css,
additional_inputs=[
gr.Textbox(value="", label="System Message"),
gr.Slider(64, 2048, value=512, step=1, label="Max Tokens"),
gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
]
)
if __name__ == "__main__":
demo.launch() |