File size: 2,994 Bytes
eb450e3
 
582395b
eb450e3
5113576
eb450e3
b51f88d
5113576
 
 
 
 
 
582395b
45634ef
582395b
 
5113576
582395b
45634ef
582395b
 
45634ef
582395b
5113576
582395b
e3c453c
5113576
3cfecb5
582395b
 
 
fa8b0f1
 
 
582395b
 
 
 
 
 
 
 
 
5113576
 
cc5d25c
5113576
582395b
 
45634ef
6e60b60
f004227
 
5113576
582395b
 
 
 
 
 
 
 
5113576
 
eb450e3
f004227
 
 
 
 
 
 
 
da0a172
8a31de9
 
da0a172
 
 
 
 
 
 
5113576
da0a172
 
 
 
 
 
 
 
 
 
5113576
 
 
 
 
 
cc5d25c
5113576
 
 
 
 
 
eb450e3
582395b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import gradio as gr
from huggingface_hub import InferenceClient
import time

client = InferenceClient("lambdaindie/lambdai")

css = """
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono&display=swap');

* {
    font-family: 'JetBrains Mono', monospace !important;
}

body {
    background-color: #111;
    color: #e0e0e0;
}

.markdown-think {
    background-color: #1e1e1e;
    border-left: 4px solid #555;
    padding: 10px;
    margin-bottom: 8px;
    font-style: italic;
    white-space: pre-wrap;
    animation: pulse 1.5s infinite ease-in-out;
}

@keyframes pulse {
    0% { opacity: 0.6; }
    50% { opacity: 1.0; }
    100% { opacity: 0.6; }
}
"""

def respond(message, history, system_message, max_tokens, temperature, top_p):
    messages = [{"role": "system", "content": system_message}] if system_message else []

    for user, assistant in history:
        if user:
            messages.append({"role": "user", "content": user})
        if assistant:
            messages.append({"role": "assistant", "content": assistant})

    thinking_prompt = messages + [{
        "role": "user",
        "content": f"{message}\n\nThink a bit step-by-step before answering."
    }]

    reasoning = ""
    yield '<div class="markdown-think">Thinking...</div>'

    start = time.time()

    for chunk in client.chat_completion(
        thinking_prompt,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = chunk.choices[0].delta.content or ""
        reasoning += token
        styled_thought = f'<div class="markdown-think">{reasoning.strip()}</div>'
        yield styled_thought

    elapsed = time.time() - start

    yield f"""
    <div style="margin-top:12px;padding:8px 12px;background-color:#222;border-left:4px solid #888;
                font-family:'JetBrains Mono', monospace;color:#ccc;font-size:14px;">
        Pensou por {elapsed:.1f} segundos
    </div>
    """

    time.sleep(2)

    final_prompt = messages + [
        {"role": "user", "content": message},
        {"role": "assistant", "content": reasoning.strip()},
        {"role": "user", "content": "Now answer based on your reasoning above."}
    ]

    final_answer = ""
    for chunk in client.chat_completion(
        final_prompt,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = chunk.choices[0].delta.content or ""
        final_answer += token
        yield final_answer.strip()

demo = gr.ChatInterface(
    fn=respond,
    title="λambdAI",
    theme=gr.themes.Base(),
    css=css,
    additional_inputs=[
        gr.Textbox(value="",
                   label="System Message"),
        gr.Slider(64, 2048, value=512, step=1, label="Max Tokens"),
        gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
    ]
)

if __name__ == "__main__":
    demo.launch()