Spaces:
Running
Running
File size: 2,752 Bytes
eb450e3 582395b eb450e3 968b137 eb450e3 3cfecb5 287900e 21f15f0 582395b 45634ef 582395b 287900e 582395b 45634ef 582395b 45634ef 582395b e3c453c 21f15f0 3cfecb5 582395b fa8b0f1 582395b 21f15f0 582395b 45634ef 6e60b60 582395b 45634ef 582395b eb450e3 da0a172 582395b da0a172 21f15f0 582395b 21f15f0 da0a172 582395b eb450e3 582395b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import gradio as gr
from huggingface_hub import InferenceClient
import time
client = InferenceClient("lambdaindie/lambdai")
css = """
@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono&display=swap');
* {
font-family: 'JetBrains Mono', monospace !important;
}
body {
background-color: #111;
color: #e0e0e0;
}
.markdown-think {
background-color: #1e1e1e;
border-left: 4px solid #555;
padding: 10px;
margin-bottom: 8px;
font-style: italic;
white-space: pre-wrap;
animation: pulse 1.5s infinite ease-in-out;
}
@keyframes pulse {
0% { opacity: 0.6; }
50% { opacity: 1.0; }
100% { opacity: 0.6; }
}
"""
def respond(message, history, system_message, max_tokens, temperature, top_p):
messages = [{"role": "system", "content": system_message}] if system_message else []
for user, assistant in history:
if user:
messages.append({"role": "user", "content": user})
if assistant:
messages.append({"role": "assistant", "content": assistant})
thinking_prompt = messages + [{
"role": "user",
"content": f"{message}\n\nThink step-by-step before answering."
}]
reasoning = ""
yield '<div class="markdown-think">Thinking...</div>'
for chunk in client.chat_completion(
thinking_prompt,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = chunk.choices[0].delta.content or ""
reasoning += token
styled_thought = f'<div class="markdown-think">{reasoning.strip()}</div>'
yield styled_thought
time.sleep(0.5)
final_prompt = messages + [
{"role": "user", "content": message},
{"role": "assistant", "content": reasoning.strip()},
{"role": "user", "content": "Now answer based on your reasoning above."}
]
final_answer = ""
for chunk in client.chat_completion(
final_prompt,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = chunk.choices[0].delta.content or ""
final_answer += token
yield final_answer.strip()
demo = gr.ChatInterface(
fn=respond,
title="λambdAI",
theme=gr.themes.Base(),
css=css,
additional_inputs=[
gr.Textbox(value="You are a concise, logical AI that explains its reasoning clearly before answering.",
label="System Message"),
gr.Slider(64, 2048, value=512, step=1, label="Max Tokens"),
gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
]
)
if __name__ == "__main__":
demo.launch() |