Spaces:

lambdaindie
/

lambdai

Sleeping

File size: 2,402 Bytes

eb450e3
 
7515381
eb450e3
09742af
eb450e3
162ed73
 
 
eb450e3
162ed73
 
 
 
 
eb450e3
162ed73
 
 
 
 
 
 
09742af
162ed73
 
eb450e3
162ed73
 
09742af
eb450e3
 
 
 
162ed73
 
 
 
 
eb450e3
162ed73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09742af
 
 
162ed73
 
09742af
162ed73
 
 
09742af
162ed73
 
 
 
09742af
eb450e3
 
91e7ac0

import gradio as gr
from huggingface_hub import InferenceClient
import time

client = InferenceClient("lambdaindie/lambdai")

def respond(message, history, system_message, max_tokens, temperature, top_p):
    # Build base message history
    messages = [{"role": "system", "content": system_message}] if system_message else []

    for user, assistant in history:
        if user:
            messages.append({"role": "user", "content": user})
        if assistant:
            messages.append({"role": "assistant", "content": assistant})

    # Phase 1 — Thinking aloud (reasoning step)
    thinking_prompt = messages + [
        {
            "role": "user",
            "content": f"{message}\n\nThink step-by-step before answering."
        }
    ]

    reasoning = ""
    yield "**Thinking...**\n```markdown\n```"  # Trigger gray markdown block

    for chunk in client.chat_completion(
        thinking_prompt,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = chunk.choices[0].delta.content or ""
        reasoning += token
        yield f"**Thinking...**\n```markdown\n{reasoning.strip()}```"

    time.sleep(0.5)  # Optional dramatic pause

    # Phase 2 — Final answer
    final_prompt = messages + [
        {"role": "user", "content": message},
        {"role": "assistant", "content": reasoning.strip()},
        {"role": "user", "content": "Now answer based on your reasoning above."}
    ]

    final_answer = ""
    for chunk in client.chat_completion(
        final_prompt,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = chunk.choices[0].delta.content or ""
        final_answer += token
        yield final_answer.strip()

demo = gr.ChatInterface(
    respond,
    title="LENIRΛ",
    theme=gr.themes.Base(primary_hue="gray", font=["JetBrains Mono", "monospace"]),
    additional_inputs=[
        gr.Textbox(
            value="You are a concise, logical AI that explains its reasoning clearly before answering.",
            label="System Message"
        ),
        gr.Slider(64, 2048, value=512, step=1, label="Max Tokens"),
        gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p")
    ]
)

if __name__ == "__main__":
    demo.launch()