File size: 2,752 Bytes
eb450e3
 
582395b
eb450e3
968b137
eb450e3
3cfecb5
287900e
 
21f15f0
 
 
 
582395b
45634ef
582395b
 
287900e
582395b
45634ef
582395b
 
45634ef
582395b
 
 
e3c453c
21f15f0
3cfecb5
582395b
 
 
fa8b0f1
 
 
582395b
 
 
 
 
 
 
 
 
21f15f0
 
 
 
582395b
 
45634ef
6e60b60
582395b
 
 
 
 
 
 
 
 
45634ef
582395b
eb450e3
da0a172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
582395b
 
da0a172
21f15f0
582395b
 
21f15f0
da0a172
 
 
 
582395b
 
eb450e3
582395b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import gradio as gr
from huggingface_hub import InferenceClient
import time

client = InferenceClient("lambdaindie/lambdai")

css = """
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono&display=swap');

* {
    font-family: 'JetBrains Mono', monospace !important;
}

body {
    background-color: #111;
    color: #e0e0e0;
}

.markdown-think {
    background-color: #1e1e1e;
    border-left: 4px solid #555;
    padding: 10px;
    margin-bottom: 8px;
    font-style: italic;
    white-space: pre-wrap;
    animation: pulse 1.5s infinite ease-in-out;
}

@keyframes pulse {
    0% { opacity: 0.6; }
    50% { opacity: 1.0; }
    100% { opacity: 0.6; }
}
"""

def respond(message, history, system_message, max_tokens, temperature, top_p):
    messages = [{"role": "system", "content": system_message}] if system_message else []

    for user, assistant in history:
        if user:
            messages.append({"role": "user", "content": user})
        if assistant:
            messages.append({"role": "assistant", "content": assistant})

    thinking_prompt = messages + [{
        "role": "user",
        "content": f"{message}\n\nThink step-by-step before answering."
    }]

    reasoning = ""
    yield '<div class="markdown-think">Thinking...</div>'

    for chunk in client.chat_completion(
        thinking_prompt,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = chunk.choices[0].delta.content or ""
        reasoning += token
        styled_thought = f'<div class="markdown-think">{reasoning.strip()}</div>'
        yield styled_thought

    time.sleep(0.5)

    final_prompt = messages + [
        {"role": "user", "content": message},
        {"role": "assistant", "content": reasoning.strip()},
        {"role": "user", "content": "Now answer based on your reasoning above."}
    ]

    final_answer = ""
    for chunk in client.chat_completion(
        final_prompt,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = chunk.choices[0].delta.content or ""
        final_answer += token
        yield final_answer.strip()

demo = gr.ChatInterface(
    fn=respond,
    title="λambdAI",
    theme=gr.themes.Base(),
    css=css,
    additional_inputs=[
        gr.Textbox(value="You are a concise, logical AI that explains its reasoning clearly before answering.",
                   label="System Message"),
        gr.Slider(64, 2048, value=512, step=1, label="Max Tokens"),
        gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
    ]
)

if __name__ == "__main__":
    demo.launch()