mariusjabami commited on
Commit
6a03bd2
verified
1 Parent(s): 7a3208e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -92
app.py CHANGED
@@ -1,109 +1,112 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
3
- import torch
4
- import threading
5
  import time
6
 
7
- model_id = "lambdaindie/lambdai"
8
- tokenizer = AutoTokenizer.from_pretrained(model_id)
9
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
10
-
11
- device = "cuda" if torch.cuda.is_available() else "cpu"
12
- model.to(device)
13
 
14
  css = """
15
  @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono&display=swap');
16
- * { font-family: 'JetBrains Mono', monospace !important; }
17
- body { background-color: #111; color: #e0e0e0; }
 
 
 
 
 
 
 
 
 
18
  .markdown-think {
19
- background-color: #1e1e1e;
20
- border-left: 4px solid #555;
21
- padding: 10px;
22
- margin-bottom: 8px;
23
- font-style: italic;
24
- white-space: pre-wrap;
25
- animation: pulse 1.5s infinite ease-in-out;
26
  }
 
27
  @keyframes pulse {
28
- 0% { opacity: 0.6; }
29
- 50% { opacity: 1.0; }
30
- 100% { opacity: 0.6; }
31
  }
32
  """
33
 
34
  def respond(message, history, system_message, max_tokens, temperature, top_p):
35
- messages = [{"role": "system", "content": system_message}] if system_message else []
36
-
37
- for user, assistant in history:
38
- if user:
39
- messages.append({"role": "user", "content": user})
40
- if assistant:
41
- messages.append({"role": "assistant", "content": assistant})
42
-
43
- thinking_prompt = messages + [{"role": "user", "content": f"{message}\n\nThink step-by-step."}]
44
- prompt = tokenizer.apply_chat_template(thinking_prompt, tokenize=False, add_generation_prompt=True)
45
-
46
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
47
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
48
-
49
- reasoning = ""
50
- yield '<div class="markdown-think">Thinking...</div>'
51
- start = time.time()
52
-
53
- thread = threading.Thread(target=model.generate, kwargs={
54
- "inputs": inputs["input_ids"],
55
- "max_new_tokens": max_tokens,
56
- "temperature": temperature,
57
- "top_p": top_p,
58
- "streamer": streamer,
59
- })
60
- thread.start()
61
-
62
- for token in streamer:
63
- reasoning += token
64
- yield f'<div class="markdown-think">{reasoning.strip()}</div>'
65
-
66
- elapsed = time.time() - start
67
- yield f"""<div style="margin-top:12px;padding:8px 12px;background-color:#222;border-left:4px solid #888;
68
- font-family:'JetBrains Mono', monospace;color:#ccc;font-size:14px;">
69
- Pensou por {elapsed:.1f} segundos</div>"""
70
-
71
- # Segunda etapa: resposta final
72
- final_prompt = messages + [
73
- {"role": "user", "content": message},
74
- {"role": "assistant", "content": reasoning.strip()},
75
- {"role": "user", "content": "Agora responda baseado nisso."}
76
- ]
77
- prompt2 = tokenizer.apply_chat_template(final_prompt, tokenize=False, add_generation_prompt=True)
78
- inputs2 = tokenizer(prompt2, return_tensors="pt").to(device)
79
- streamer2 = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
80
-
81
- thread2 = threading.Thread(target=model.generate, kwargs={
82
- "inputs": inputs2["input_ids"],
83
- "max_new_tokens": max_tokens,
84
- "temperature": temperature,
85
- "top_p": top_p,
86
- "streamer": streamer2,
87
- })
88
- thread2.start()
89
-
90
- final_answer = ""
91
- for token in streamer2:
92
- final_answer += token
93
- yield final_answer.strip()
94
 
95
  demo = gr.ChatInterface(
96
- fn=respond,
97
- title="位ambdAI",
98
- theme=gr.themes.Base(),
99
- css=css,
100
- additional_inputs=[
101
- gr.Textbox(value="", label="System Message"),
102
- gr.Slider(64, 2048, value=512, step=1, label="Max Tokens"),
103
- gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
104
- gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
105
- ]
 
106
  )
107
 
108
- if __name__ == "__main__":
109
- demo.launch()
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
 
 
3
  import time
4
 
5
+ client = InferenceClient("lambdaindie/lambdai")
 
 
 
 
 
6
 
7
  css = """
8
  @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono&display=swap');
9
+
10
+ {
11
+ font-family: 'JetBrains Mono', monospace !important;
12
+ }
13
+
14
+
15
+ body {
16
+ background-color: #111;
17
+ color: #e0e0e0;
18
+ }
19
+
20
  .markdown-think {
21
+ background-color: #1e1e1e;
22
+ border-left: 4px solid #555;
23
+ padding: 10px;
24
+ margin-bottom: 8px;
25
+ font-style: italic;
26
+ white-space: pre-wrap;
27
+ animation: pulse 1.5s infinite ease-in-out;
28
  }
29
+
30
  @keyframes pulse {
31
+ 0% { opacity: 0.6; }
32
+ 50% { opacity: 1.0; }
33
+ 100% { opacity: 0.6; }
34
  }
35
  """
36
 
37
  def respond(message, history, system_message, max_tokens, temperature, top_p):
38
+ messages = [{"role": "system", "content": system_message}] if system_message else []
39
+
40
+ for user, assistant in history:
41
+ if user:
42
+ messages.append({"role": "user", "content": user})
43
+ if assistant:
44
+ messages.append({"role": "assistant", "content": assistant})
45
+
46
+ thinking_prompt = messages + [{
47
+ "role": "user",
48
+ "content": f"{message}\n\nThink a bit step-by-step before answering."
49
+ }]
50
+
51
+ reasoning = ""
52
+ yield '<div class="markdown-think">Thinking...</div>'
53
+
54
+ start = time.time()
55
+
56
+ for chunk in client.chat_completion(
57
+ thinking_prompt,
58
+ max_tokens=max_tokens,
59
+ stream=True,
60
+ temperature=temperature,
61
+ top_p=top_p,
62
+ ):
63
+ token = chunk.choices[0].delta.content or ""
64
+ reasoning += token
65
+ styled_thought = f'<div class="markdown-think">{reasoning.strip()}</div>'
66
+ yield styled_thought
67
+
68
+ elapsed = time.time() - start
69
+
70
+ yield f"""
71
+ <div style="margin-top:12px;padding:8px 12px;background-color:#222;border-left:4px solid #888;
72
+ font-family:'JetBrains Mono', monospace;color:#ccc;font-size:14px;">
73
+ Pensou por {elapsed:.1f} segundos
74
+ </div>
75
+ """
76
+
77
+ time.sleep(2)
78
+
79
+ final_prompt = messages + [
80
+ {"role": "user", "content": message},
81
+ {"role": "assistant", "content": reasoning.strip()},
82
+ {"role": "user", "content": "Now answer based on your reasoning above."}
83
+ ]
84
+
85
+ final_answer = ""
86
+ for chunk in client.chat_completion(
87
+ final_prompt,
88
+ max_tokens=max_tokens,
89
+ stream=True,
90
+ temperature=temperature,
91
+ top_p=top_p,
92
+ ):
93
+ token = chunk.choices[0].delta.content or ""
94
+ final_answer += token
95
+ yield final_answer.strip()
 
96
 
97
  demo = gr.ChatInterface(
98
+ fn=respond,
99
+ title="位ambdAI",
100
+ theme=gr.themes.Base(),
101
+ css=css,
102
+ additional_inputs=[
103
+ gr.Textbox(value="",
104
+ label="System Message"),
105
+ gr.Slider(64, 2048, value=512, step=1, label="Max Tokens"),
106
+ gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
107
+ gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
108
+ ]
109
  )
110
 
111
+ if name == "main":
112
+ demo.launch()