File size: 1,360 Bytes
39fb316
3b6f0da
9faf370
a174543
9faf370
3b6f0da
 
 
 
 
 
 
9faf370
3b6f0da
9faf370
3b6f0da
 
 
 
9faf370
3b6f0da
 
 
 
 
a3382ae
3b6f0da
 
9faf370
 
3b6f0da
403e5d7
 
9faf370
 
3b6f0da
 
 
 
9faf370
3b6f0da
 
9faf370
3b6f0da
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "lambdaindie/lambda-1v-1B"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="float32",
    low_cpu_mem_usage=True,
    device_map="auto"
)
model.eval()

tokenizer = AutoTokenizer.from_pretrained(model_name)

def respond(prompt):
    full_prompt = f"Think step-by-step.\nQuestion: {prompt}\nAnswer:"
    inputs = tokenizer(full_prompt, return_tensors="pt", return_attention_mask=False)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}

    output = model.generate(
        **inputs,
        max_new_tokens=128,
        do_sample=False,  # greedy, menos RAM
        pad_token_id=tokenizer.eos_token_id,
    )
    answer = tokenizer.decode(output[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
    return answer.strip()

with gr.Blocks(css="""
    .gr-button, .gr-textbox {  
        font-family: 'JetBrains Mono', monospace !important;  
        font-size: 11px !important;  
    }
""") as demo:
    gr.Markdown("## λambdAI — Light CPU Reasoning")
    txt = gr.Textbox(placeholder="Digite sua pergunta...", show_label=False)
    output = gr.Textbox(label="Resposta", lines=6)
    btn = gr.Button("Enviar")

    btn.click(respond, txt, output)
    txt.submit(respond, txt, output)

demo.launch(share=True)