mariusjabami commited on
Commit
162ed73
·
verified ·
1 Parent(s): 7515381

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -95
app.py CHANGED
@@ -2,119 +2,75 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import time
4
 
5
- # Cliente da Inference API
6
  client = InferenceClient("lambdaindie/lambdai")
7
 
8
- # Função para responder no chatbot
9
- def respond(
10
- message,
11
- history: list[tuple[str, str]],
12
- system_message,
13
- max_tokens,
14
- temperature,
15
- top_p,
16
- ):
17
- messages = [{"role": "system", "content": system_message}]
18
 
19
- # Adicionando a história da conversa
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
 
26
- messages.append({"role": "user", "content": message})
 
 
 
 
 
 
27
 
28
- response = ""
 
29
 
30
- # Mostrando o indicador de "thinking"
31
- yield "Thinking... 🤔"
32
- time.sleep(1) # Atraso artificial para simular tempo de processamento
33
-
34
- # Fluxo de resposta do cliente da API
35
- for message in client.chat_completion(
36
- messages,
37
  max_tokens=max_tokens,
38
  stream=True,
39
  temperature=temperature,
40
  top_p=top_p,
41
  ):
42
- token = message.choices[0].delta.content
43
- response += token
44
- yield response
 
 
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- # Interface do Gradio com chat customizado
48
  demo = gr.ChatInterface(
49
  respond,
 
 
50
  additional_inputs=[
51
- gr.Textbox(value="", label="System message", lines=1, placeholder="System message..."),
52
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
53
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
54
- gr.Slider(
55
- minimum=0.1,
56
- maximum=1.0,
57
- value=0.95,
58
- step=0.05,
59
- label="Top-p (nucleus sampling)",
60
  ),
61
- ],
62
- title="Lambda-v1-1B", # Título na interface
63
- description="Chatbot alimentado pelo modelo Lambdai", # Descrição simples
64
- theme="dark", # Usando o tema Dark do Gradio
65
  )
66
 
67
- # Customização de CSS simples para JetBrains Mono e visual Dark
68
- demo.css = """
69
- * {
70
- font-family: 'JetBrains Mono', monospace;
71
- }
72
- .gradio-container {
73
- background-color: #121212;
74
- color: #ffffff;
75
- border-radius: 10px;
76
- padding: 20px;
77
- }
78
- .chatbox {
79
- background-color: #181818;
80
- border-radius: 8px;
81
- color: #f5f5f5;
82
- border: 1px solid #333;
83
- }
84
- .gr-button {
85
- background-color: #4a90e2;
86
- color: white;
87
- border-radius: 5px;
88
- padding: 10px 20px;
89
- font-size: 16px;
90
- }
91
- .gr-button:hover {
92
- background-color: #357ab7;
93
- }
94
- .gr-slider {
95
- background-color: #333;
96
- color: #f5f5f5;
97
- border-radius: 8px;
98
- }
99
- .gr-chatbox-container {
100
- background-color: #1f1f1f;
101
- border-radius: 10px;
102
- }
103
- .gr-output {
104
- font-family: 'JetBrains Mono', monospace;
105
- color: #f5f5f5;
106
- }
107
- .gr-input {
108
- font-family: 'JetBrains Mono', monospace;
109
- color: #f5f5f5;
110
- }
111
- .gr-chatbox .message {
112
- font-family: 'JetBrains Mono', monospace;
113
- }
114
- .gr-button.gr-loading {
115
- background-color: #f39c12;
116
- }
117
- """
118
-
119
  if __name__ == "__main__":
120
  demo.launch()
 
2
  from huggingface_hub import InferenceClient
3
  import time
4
 
 
5
  client = InferenceClient("lambdaindie/lambdai")
6
 
7
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
8
+ # Build base message history
9
+ messages = [{"role": "system", "content": system_message}] if system_message else []
 
 
 
 
 
 
 
10
 
11
+ for user, assistant in history:
12
+ if user:
13
+ messages.append({"role": "user", "content": user})
14
+ if assistant:
15
+ messages.append({"role": "assistant", "content": assistant})
 
16
 
17
+ # Phase 1 — Thinking aloud (reasoning step)
18
+ thinking_prompt = messages + [
19
+ {
20
+ "role": "user",
21
+ "content": f"{message}\n\nThink step-by-step before answering."
22
+ }
23
+ ]
24
 
25
+ reasoning = ""
26
+ yield "**Thinking...**\n```markdown\n```" # Trigger gray markdown block
27
 
28
+ for chunk in client.chat_completion(
29
+ thinking_prompt,
 
 
 
 
 
30
  max_tokens=max_tokens,
31
  stream=True,
32
  temperature=temperature,
33
  top_p=top_p,
34
  ):
35
+ token = chunk.choices[0].delta.content or ""
36
+ reasoning += token
37
+ yield f"**Thinking...**\n```markdown\n{reasoning.strip()}```"
38
+
39
+ time.sleep(0.5) # Optional dramatic pause
40
 
41
+ # Phase 2 — Final answer
42
+ final_prompt = messages + [
43
+ {"role": "user", "content": message},
44
+ {"role": "assistant", "content": reasoning.strip()},
45
+ {"role": "user", "content": "Now answer based on your reasoning above."}
46
+ ]
47
+
48
+ final_answer = ""
49
+ for chunk in client.chat_completion(
50
+ final_prompt,
51
+ max_tokens=max_tokens,
52
+ stream=True,
53
+ temperature=temperature,
54
+ top_p=top_p,
55
+ ):
56
+ token = chunk.choices[0].delta.content or ""
57
+ final_answer += token
58
+ yield final_answer.strip()
59
 
 
60
  demo = gr.ChatInterface(
61
  respond,
62
+ title="LENIRΛ",
63
+ theme=gr.themes.Base(primary_hue="gray", font=["JetBrains Mono", "monospace"]),
64
  additional_inputs=[
65
+ gr.Textbox(
66
+ value="You are a concise, logical AI that explains its reasoning clearly before answering.",
67
+ label="System Message"
 
 
 
 
 
 
68
  ),
69
+ gr.Slider(64, 2048, value=512, step=1, label="Max Tokens"),
70
+ gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
71
+ gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
72
+ ]
73
  )
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  if __name__ == "__main__":
76
  demo.launch()