lamb / app.py
mariusjabami's picture
Update app.py
3b6f0da verified
raw
history blame
1.36 kB
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "lambdaindie/lambda-1v-1B"
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype="float32",
low_cpu_mem_usage=True,
device_map="auto"
)
model.eval()
tokenizer = AutoTokenizer.from_pretrained(model_name)
def respond(prompt):
full_prompt = f"Think step-by-step.\nQuestion: {prompt}\nAnswer:"
inputs = tokenizer(full_prompt, return_tensors="pt", return_attention_mask=False)
inputs = {k: v.to(model.device) for k, v in inputs.items()}
output = model.generate(
**inputs,
max_new_tokens=128,
do_sample=False, # greedy, menos RAM
pad_token_id=tokenizer.eos_token_id,
)
answer = tokenizer.decode(output[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
return answer.strip()
with gr.Blocks(css="""
.gr-button, .gr-textbox {
font-family: 'JetBrains Mono', monospace !important;
font-size: 11px !important;
}
""") as demo:
gr.Markdown("## λambdAI — Light CPU Reasoning")
txt = gr.Textbox(placeholder="Digite sua pergunta...", show_label=False)
output = gr.Textbox(label="Resposta", lines=6)
btn = gr.Button("Enviar")
btn.click(respond, txt, output)
txt.submit(respond, txt, output)
demo.launch(share=True)