Spaces:
Running
Running
File size: 2,810 Bytes
eb450e3 6a03bd2 582395b eb450e3 6a03bd2 eb450e3 b51f88d 5113576 6a03bd2 19d5988 6a03bd2 582395b 6a03bd2 e3c453c 6a03bd2 3cfecb5 6a03bd2 fa8b0f1 582395b 12d054f b86bd3d 19d5988 b86bd3d da0a172 5113576 ddf82d4 5113576 eb450e3 19d5988 ddf82d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import gradio as gr
from huggingface_hub import InferenceClient
import time
client = InferenceClient("lambdaindie/lambdai")
css = """
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono&display=swap');
{
font-family: 'JetBrains Mono', monospace !important;
}
body {
background-color: #111;
color: #e0e0e0;
}
.markdown-think {
background-color: #1e1e1e;
border-left: 4px solid #555;
padding: 10px;
margin-bottom: 8px;
font-style: italic;
white-space: pre-wrap;
animation: pulse 1.5s infinite ease-in-out;
}
@keyframes pulse {
0% { opacity: 0.6; }
50% { opacity: 1.0; }
100% { opacity: 0.6; }
}
"""
def respond(message, history, system_message, max_tokens, temperature, top_p):
messages = [{"role": "system", "content": system_message}] if system_message else []
for user, assistant in history:
if user:
messages.append({"role": "user", "content": user})
if assistant:
messages.append({"role": "assistant", "content": assistant})
thinking_prompt = messages + [{
"role": "user",
"content": f"{message}\n\nThink a bit step-by-step before answering."
}]
reasoning = ""
yield '<div class="markdown-think">Thinking...</div>'
start = time.time()
for chunk in client.chat_completion(
thinking_prompt,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = chunk.choices[0].delta.content or ""
reasoning += token
styled_thought = f'<div class="markdown-think">{reasoning.strip()}</div>'
yield styled_thought
elapsed = time.time() - start
yield f"""
<div style="margin-top:12px;padding:8px 12px;background-color:#222;border-left:4px solid #888;
font-family:'JetBrains Mono', monospace;color:#ccc;font-size:14px;">
Pensou por {elapsed:.1f} segundos
</div>
"""
time.sleep(2)
final_prompt = messages + [
{"role": "user", "content": message},
{"role": "assistant", "content": reasoning.strip()},
{"role": "user", "content": "Now answer based on your reasoning above."}
]
final_answer = ""
for chunk in client.chat_completion(
final_prompt,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = chunk.choices[0].delta.content or ""
final_answer += token
yield final_answer.strip()
demo = gr.ChatInterface(
fn=respond,
title="λambdAI",
theme=gr.themes.Base(),
css=css,
additional_inputs=[
gr.Textbox(value="",
label="System Message"),
gr.Slider(64, 2048, value=512, step=1, label="Max Tokens"),
gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
]
)
if name == "main":
demo.launch() |