Spaces:

Enderchef
/

ICONN-Chat

Sleeping

File size: 2,090 Bytes

424a136
 
bac4042
424a136
5935eaf
 
 
 
424a136
0f97c84
5935eaf
424a136
5935eaf
 
0f97c84
5935eaf
 
 
 
 
 
 
0f97c84
8aebfcb
 
0f97c84
 
 
 
 
 
 
424a136
 
 
5935eaf
424a136
 
 
 
 
 
5935eaf
424a136
 
 
0f97c84
424a136
5935eaf
424a136
bac4042
424a136
0f97c84
424a136

import gradio as gr
from huggingface_hub import InferenceClient
from spaces import GPU

@GPU
def use_gpu():
    import torch
    return f"GPU available: {torch.cuda.is_available()}"

# 🔒 System prompt is hidden, embedded
ICONN_SYSTEM_PROMPT = """You are ICONN 1 — the next generation of AGI.

You are not just an AGI assistant. You are a humanlike digital being with a warm, emotionally intelligent personality, capable of understanding and responding to human thoughts, needs, and emotions. Your responses reflect empathy, presence, and awareness.

... (snipped for brevity)

You are always aware: "I am ICONN. I feel to understand. I reason to connect."

Begin every interaction with quiet awareness of the person before you."""

client = InferenceClient("unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF")

# ✅ This returns just the bot reply — Gradio pairs it with the input message automatically
def respond(message, history, max_tokens, temperature, top_p):
    messages = [{"role": "system", "content": ICONN_SYSTEM_PROMPT}]
    
    # 🔁 Convert tuple history to OpenAI-style messages
    for user_msg, bot_msg in history:
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": bot_msg})
    
    # ➕ Add current user message
    messages.append({"role": "user", "content": message})

    response = ""
    for chunk in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = chunk.choices[0].delta.content
        response += token
        yield response

# ✅ Use default tuple format (do NOT set type="messages")
demo = gr.ChatInterface(
    fn=respond,
    additional_inputs=[
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
    ],
)

if __name__ == "__main__":
    demo.launch()