|
import gradio as gr |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
model_name = "lambdaindie/lambda-1v-1B" |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
torch_dtype="float32", |
|
low_cpu_mem_usage=True, |
|
device_map="auto" |
|
) |
|
model.eval() |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
def respond(prompt): |
|
full_prompt = f"Think step-by-step.\nQuestion: {prompt}\nAnswer:" |
|
inputs = tokenizer(full_prompt, return_tensors="pt", return_attention_mask=False) |
|
inputs = {k: v.to(model.device) for k, v in inputs.items()} |
|
|
|
output = model.generate( |
|
**inputs, |
|
max_new_tokens=128, |
|
do_sample=False, |
|
pad_token_id=tokenizer.eos_token_id, |
|
) |
|
answer = tokenizer.decode(output[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True) |
|
return answer.strip() |
|
|
|
with gr.Blocks(css=""" |
|
.gr-button, .gr-textbox { |
|
font-family: 'JetBrains Mono', monospace !important; |
|
font-size: 11px !important; |
|
} |
|
""") as demo: |
|
gr.Markdown("## λambdAI — Light CPU Reasoning") |
|
txt = gr.Textbox(placeholder="Digite sua pergunta...", show_label=False) |
|
output = gr.Textbox(label="Resposta", lines=6) |
|
btn = gr.Button("Enviar") |
|
|
|
btn.click(respond, txt, output) |
|
txt.submit(respond, txt, output) |
|
|
|
demo.launch(share=True) |