import os
import gradio as gr
from openai import OpenAI

# API endpoints
API_HOSTS = {
    "Domestic": "https://api.chatanywhere.tech/v1",
    "Overseas": "https://api.chatanywhere.org/v1"
}

# Model info
MODELS_INFO = {
    "gpt-3.5-turbo": {"input_price": "0.0035", "output_price": "0.0105", "features": "Fast, affordable"},
    "gpt-4o": {"input_price": "0.0175", "output_price": "0.07", "features": "Cheaper & faster GPT-4O"},
    "gpt-4-turbo": {"input_price": "0.07", "output_price": "0.21", "features": "Multimodal, tool use"},
    "gpt-4o-ca": {"input_price": "0.01", "output_price": "0.04", "features": "CA variant, daily free limit"},
}

def create_client(host):
    """Create OpenAI API client with given host."""
    key = os.getenv("OPENAI_API_KEY")
    if not key:
        raise ValueError("Missing environment variable: OPENAI_API_KEY")
    return OpenAI(api_key=key, base_url=host)

def get_model_card(model_name):
    """Return markdown info for the selected model."""
    info = MODELS_INFO.get(model_name, {})
    if not info:
        return "Model info not available."
    return (
        f"**{model_name}**\n\n"
        f"Input price (/1K tokens): {info['input_price']}\n\n"
        f"Output price (/1K tokens): {info['output_price']}\n\n"
        f"Features: {info['features']}"
    )

def respond(user, history, host_choice, model_name, temperature, top_p, max_tokens, sys_prompt):
    """Main chat handler with streaming and error handling."""
    history = history or []

    if not user.strip():
        yield history + [("", "⚠️ Please enter a message.")]
        return

    try:
        client = create_client(API_HOSTS[host_choice])
    except Exception as e:
        yield history + [("", f"❌ {e}")]
        return

    messages = [{"role": "system", "content": sys_prompt or "You are a helpful assistant."}]
    for u, a in history:
        messages.append({"role": "user", "content": u})
        messages.append({"role": "assistant", "content": a})
    messages.append({"role": "user", "content": user})

    try:
        stream = client.chat.completions.create(
            model=model_name,
            messages=messages,
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_tokens,
            stream=True
        )

        partial = ""
        history.append((user, partial))
        yield history  # initial blank assistant message

        for chunk in stream:
            try:
                if not chunk.choices:
                    continue
                choice = chunk.choices[0]
                if not hasattr(choice, "delta") or not choice.delta:
                    continue
                delta = getattr(choice.delta, "content", "") or ""
                if delta:
                    partial += delta
                    history[-1] = (user, partial)
                    yield history
            except Exception:
                continue  # skip malformed chunks

        if not partial.strip():
            history[-1] = (user, "⚠️ No response received from the model.")
            yield history

    except Exception as e:
        err = str(e)
        if "429" in err:
            out = (
                "🚫 Daily quota reached for this model.\n"
                "Please try again after 00:00 China time or switch model/host."
            )
        elif "401" in err or "invalid_api_key" in err.lower():
            out = "❌ Invalid or missing API key. Check your OPENAI_API_KEY."
        elif "timed out" in err.lower():
            out = "⌛ Request timed out. Please try again."
        else:
            out = f"❌ API Error: {err}"
        history.append((user, out))
        yield history


with gr.Blocks(title="ChatAnywhere Realtime Chatbot", theme=gr.themes.Soft()) as demo:
    gr.Markdown("## 💬 ChatAnywhere Realtime Chatbot\nPowered by GPT-5 via ChatAnywhere API")
    with gr.Row():
        with gr.Column(scale=3):
            chat = gr.Chatbot(label="Conversation", height=500, show_copy_button=True, render_markdown=True)
            with gr.Row():
                msg = gr.Textbox(placeholder="Type your message...", lines=2, scale=4)
                send = gr.Button("Send", scale=1)
                clear = gr.Button("Clear", scale=1)
        with gr.Column(scale=1):
            host = gr.Radio(list(API_HOSTS.keys()), value="Domestic", label="API Host")
            model = gr.Dropdown(list(MODELS_INFO.keys()), value="gpt-3.5-turbo", label="Model")
            model_card = gr.Markdown(get_model_card("gpt-3.5-turbo"))
            temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature")
            top_p = gr.Slider(0.05, 1.0, value=1.0, step=0.05, label="Top-p")
            max_tokens = gr.Slider(64, 4096, value=512, step=64, label="Max Tokens")
            sys_prompt = gr.Textbox(label="System Prompt (optional)", lines=2)

    model.change(lambda m: get_model_card(m), model, model_card)

    send.click(respond, [msg, chat, host, model, temperature, top_p, max_tokens, sys_prompt], chat)
    msg.submit(respond, [msg, chat, host, model, temperature, top_p, max_tokens, sys_prompt], chat)

    send.click(lambda _: "", msg, msg)
    msg.submit(lambda _: "", msg, msg)

    clear.click(lambda: [], None, chat)

if __name__ == "__main__":
    demo.launch()