Spaces:

Abs6187
/

GPT5_Demo

Running

App Files Files Community

Abs6187 commited on 2 days ago

Commit

b0b9691

verified ·

1 Parent(s): 85f1ed2

Update app.py

Browse files

Files changed (1) hide show

app.py +163 -98

app.py CHANGED Viewed

@@ -1,126 +1,191 @@
-import gradio as gr
-from openai import OpenAI
 import os
 import time
 HOSTS = {
     "Domestic (Lower Latency)": "https://api.chatanywhere.tech/v1",
-    "Overseas": "https://api.chatanywhere.org/v1"
 }
 def create_client(base_url):
     api_key = os.getenv("OPENAI_API_KEY")
     if not api_key:
-        raise ValueError("Missing API key. Please set OPENAI_API_KEY in your environment.")
     return OpenAI(api_key=api_key, base_url=base_url)
-def respond_stream(user_message, history, host_choice, temperature):
-    history = history or []
-    # Empty message handling
-    if not user_message.strip():
-        yield history + [("", "⚠️ Please enter a message before sending.")]
-        return
-    # Prepare conversation context
-    messages = [{"role": "system", "content": "You are GPT-5, a helpful, friendly, and concise assistant."}]
     for human, ai in history:
         messages.append({"role": "user", "content": human})
         messages.append({"role": "assistant", "content": ai})
     messages.append({"role": "user", "content": user_message})
-    # API Host handling
-    try:
-        client = create_client(HOSTS[host_choice])
-    except ValueError as e:
-        yield history + [(user_message, f"❌ {e}")]
-        return
-    except Exception as e:
-        yield history + [(user_message, f"❌ Failed to initialize client: {e}")]
-        return
-    # API request with streaming
-    try:
-        with client.chat.completions.stream(
-            model="gpt-5",
-            messages=messages,
-            temperature=temperature,
-            timeout=30  # network timeout
-        ) as stream:
-            partial = ""
-            last_update_time = time.time()
-            for event in stream:
-                if event.type == "message.delta" and event.delta.content:
-                    partial += event.delta.content
-                    # Update UI at most 5 times per second
-                    if time.time() - last_update_time > 0.2:
-                        yield history + [(user_message, partial)]
-                        last_update_time = time.time()
-            history.append((user_message, partial))
-            yield history
-    except Exception as e:
-        # Retry with alternate host
         try:
-            alt_host = HOSTS["Overseas"] if host_choice == "Domestic (Lower Latency)" else HOSTS["Domestic (Lower Latency)"]
-            client = create_client(alt_host)
-            with client.chat.completions.stream(
-                model="gpt-5",
-                messages=messages,
-                temperature=temperature,
-                timeout=30
-            ) as stream:
                 partial = ""
-                for event in stream:
-                    if event.type == "message.delta" and event.delta.content:
-                        partial += event.delta.content
-                        yield history + [(user_message, partial)]
                 history.append((user_message, partial))
                 yield history
-        except Exception as e2:
-            yield history + [(user_message, f"❌ Both API hosts failed.\nHost1 Error: {e}\nHost2 Error: {e2}")]
-# UI Layout
-with gr.Blocks(title="GPT-5 Chatbot", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("<h1 style='text-align:center'>💬 GPT-5 Chatbot</h1><p style='text-align:center'>Fast, resilient, and beautiful</p>")
     with gr.Row():
         with gr.Column(scale=3):
-            chatbot = gr.Chatbot(
-                label="Conversation",
-                bubble_full_width=False,
-                height=500,
-                show_copy_button=True,
-                render_markdown=True
-            )
-            msg = gr.Textbox(placeholder="Type your message...", lines=1)
             with gr.Row():
-                send_btn = gr.Button("Send", variant="primary")
-                clear_btn = gr.Button("Clear Chat", variant="secondary")
         with gr.Column(scale=1):
-            host_choice = gr.Radio(
-                label="API Host",
-                choices=list(HOSTS.keys()),
-                value="Domestic (Lower Latency)"
-            )
-            temperature = gr.Slider(
-                label="Creativity (Temperature)",
-                minimum=0.0,
-                maximum=1.5,
-                value=0.7,
-                step=0.1
-            )
-    send_btn.click(respond_stream, [msg, chatbot, host_choice, temperature], chatbot, queue=True)
-    msg.submit(respond_stream, [msg, chatbot, host_choice, temperature], chatbot, queue=True)
-    clear_btn.click(lambda: None, None, chatbot)
 if __name__ == "__main__":
-    demo.queue().launch()

 import os
 import time
+import json
+import traceback
+import gradio as gr
+from openai import OpenAI
 HOSTS = {
     "Domestic (Lower Latency)": "https://api.chatanywhere.tech/v1",
+    "Overseas (Fallback)": "https://api.chatanywhere.org/v1"
+}
+MODELS = {
+    "o3": {"input": "0.014 / 1K Tokens", "output": "0.056 / 1K Tokens", "support": "support", "features": "Sets new standards for math, science, coding, visual reasoning tasks, and technical writing. Points o3-2025-04-16"},
+    "o3-2025-04-16": {"input": "0.014 / 1K Tokens", "output": "0.056 / 1K Tokens", "support": "support", "features": "Sets new standards for math, science, coding, visual reasoning tasks, and technical writing."},
+    "o4-mini": {"input": "0.0088 / 1K Tokens", "output": "0.0352 / 1K Tokens", "support": "support", "features": "Sets new standards for math, science, coding, visual reasoning tasks, and technical writing."},
+    "o4-mini-2025-04-16": {"input": "0.0088 / 1K Tokens", "output": "0.0352 / 1K Tokens", "support": "support", "features": "Sets new standards for math, science, coding, visual reasoning tasks, and technical writing."},
+    "gpt-4.1": {"input": "0.014 / 1K Tokens", "output": "0.056 / 1K Tokens", "support": "support", "features": "Improvements in encoding, instruction tracking, and long context. 1M input 32k output."},
+    "gpt-4.1-2025-04-14": {"input": "0.014 / 1K Tokens", "output": "0.056 / 1K Tokens", "support": "support", "features": "Improvements in encoding, instruction tracking, and long context, with 1M input and 32k output."},
+    "gpt-4.1-mini": {"input": "0.0028 / 1K Tokens", "output": "0.0112 / 1K Tokens", "support": "support", "features": "Improvements in encoding, instruction tracking, and long context."},
+    "gpt-4.1-mini-2025-04-14": {"input": "0.0028 / 1K Tokens", "output": "0.0112 / 1K Tokens", "support": "support", "features": "Improvements in encoding, instruction tracking, and long context, with 1M input and 32k output."},
+    "gpt-4.1-nano": {"input": "0.0007 / 1K Tokens", "output": "0.0028 / 1K Tokens", "support": "support", "features": "Improvements in encoding, instruction tracking, and long context."},
+    "gpt-4.1-nano-2025-04-14": {"input": "0.0007 / 1K Tokens", "output": "0.0028 / 1K Tokens", "support": "support", "features": "Improvements in encoding, instruction tracking, and long context, with 1M input and 32k output."},
+    "gpt-oss-20b": {"input": "0.0008 / 1K Tokens", "output": "0.0032 / 1K Tokens", "support": "support", "features": "Open source model."},
+    "gpt-oss-120b": {"input": "0.0044 / 1K Tokens", "output": "0.0176 / 1K Tokens", "support": "support", "features": "Open source model."},
+    "gpt-3.5-turbo": {"input": "0.0035 / 1K Tokens", "output": "0.0105 / 1K Tokens", "support": "support", "features": "Default model, equal to gpt-3.5-turbo-0125."},
+    "gpt-3.5-turbo-1106": {"input": "0.007 / 1K Tokens", "output": "0.014 / 1K Tokens", "support": "support", "features": "Model updated on November 6, 2023."},
+    "gpt-3.5-turbo-0125": {"input": "0.0035 / 1K Tokens", "output": "0.0105 / 1K Tokens", "support": "support", "features": "Model from January 25, 2024."},
+    "gpt-3.5-turbo-16k": {"input": "0.021 / 1K Tokens", "output": "0.028 / 1K Tokens", "support": "support", "features": "Longer context (16k)."},
+    "gpt-3.5-turbo-instruct": {"input": "0.0105 / 1K Tokens", "output": "0.014 / 1K Tokens", "support": "support", "features": "Completions-style instruct model."},
+    "o1-mini": {"input": "0.0088 / 1K Tokens", "output": "0.0352 / 1K Tokens", "support": "support", "features": "Reasoning models for complex tasks."},
+    "o1-preview": {"input": "0.105 / 1K Tokens", "output": "0.42 / 1K Tokens", "support": "support", "features": "Preview reasoning model."},
+    "o3-mini [5]": {"input": "0.0088 / 1K Tokens", "output": "0.0352 / 1K Tokens", "support": "support", "features": "Reasoning models."},
+    "o1 [5]": {"input": "0.12 / 1K Tokens", "output": "0.48 / 1K Tokens", "support": "support", "features": "Powerful reasoning model."},
+    "gpt-4o-search-preview": {"input": "0.0175 / 1K Tokens", "output": "0.07 / 1K Tokens", "support": "support", "features": "Search-enabled model (+search fee)."},
+    "gpt-4o-search-preview-2025-03-11": {"input": "0.0175 / 1K Tokens", "output": "0.07 / 1K Tokens", "support": "support", "features": "Search-enabled model."},
+    "gpt-4o-mini-search-preview": {"input": "0.00105 / 1K Tokens", "output": "0.0042 / 1K Tokens", "support": "support", "features": "Search-enabled mini model."},
+    "gpt-4o-mini-search-preview-2025-03-11": {"input": "0.00105 / 1K Tokens", "output": "0.0042 / 1K Tokens", "support": "support", "features": "Search-enabled mini model."},
+    "gpt-4": {"input": "0.21 / 1K Tokens", "output": "0.42 / 1K Tokens", "support": "support", "features": "Default GPT-4 family model."},
+    "gpt-4o": {"input": "0.0175 / 1K Tokens + Image Fee", "output": "0.07 / 1K Tokens", "support": "support", "features": "Cheaper/faster GPT-4O variant (+image fee)."},
+    "gpt-4o-2024-05-13": {"input": "0.035 / 1K Tokens + image fee", "output": "0.105 / 1K Tokens", "support": "support", "features": "GPT-4O release from 2024-05-13."},
+    "gpt-4o-2024-08-06": {"input": "0.0175 / 1K Tokens + Image Fee", "output": "0.07 / 1K Tokens", "support": "support", "features": "Supports 128k input and 16k output."},
+    "gpt-4o-2024-11-20": {"input": "0.0175 / 1K Tokens + Image Fee", "output": "0.07 / 1K Tokens", "support": "support", "features": "Improved creative writing."},
+    "chatgpt-4o-latest": {"input": "0.035 / 1K Tokens + image fee", "output": "0.105 / 1K Tokens", "support": "support", "features": "Dynamically updated version."},
+    "gpt-4o-mini": {"input": "0.00105 / 1K Tokens + Image Fee", "output": "0.0042 / 1K Tokens", "support": "support", "features": "Mini GPT-4O with image reading."},
+    "gpt-4-0613": {"input": "0.21 / 1K Tokens", "output": "0.42 / 1K Tokens", "support": "support", "features": "Updated June 13, 2023."},
+    "gpt-4-turbo-preview": {"input": "0.07 / 1K Tokens", "output": "0.21 / 1K Tokens", "support": "support", "features": "Preview turbo variant (128K input)."},
+    "gpt-4-0125-preview": {"input": "0.07 / 1K Tokens", "output": "0.21 / 1K Tokens", "support": "support", "features": "Preview updated Jan 25, 2024."},
+    "gpt-4-1106-preview": {"input": "0.07 / 1K Tokens", "output": "0.21 / 1K Tokens", "support": "support", "features": "Preview updated Nov 6, 2023."},
+    "gpt-4-vision-preview": {"input": "0.07 / 1K Tokens + Image Fee", "output": "0.21 / 1K Tokens", "support": "support", "features": "Multimodal with image recognition."},
+    "gpt-4-turbo": {"input": "0.07 / 1K Tokens + Image Fee", "output": "0.21 / 1K Tokens", "support": "support", "features": "Multimodal, function tools."},
+    "gpt-4-turbo-2024-04-09": {"input": "0.07 / 1K Tokens + image fee", "output": "0.21 / 1K Tokens", "support": "support", "features": "Preview turbo model."},
+    "gpt-4.1-ca": {"input": "0.008 / 1K Tokens", "output": "0.032 / 1K Tokens", "support": "support", "features": "Third-party provider CA variant."},
+    "gpt-4.1-mini-ca": {"input": "0.0016 / 1K Tokens", "output": "0.0064 / 1K Tokens", "support": "support", "features": "CA mini variant."},
+    "gpt-4.1-nano-ca": {"input": "0.0004 / 1K Tokens", "output": "0.003 / 1K Tokens", "support": "support", "features": "CA nano variant."},
+    "gpt-3.5-turbo-ca": {"input": "0.001 / 1K Tokens", "output": "0.0016 / 1K Tokens", "support": "support", "features": "CA region variant."},
+    "gpt-4-ca": {"input": "0.12 / 1K Tokens", "output": "0.24 / 1K Tokens", "support": "support", "features": "CA region variant."},
+    "gpt-4-turbo-ca": {"input": "0.04 / 1K Tokens + image fees", "output": "0.12 / 1K Tokens", "support": "support", "features": "CA region turbo."},
+    "gpt-4o-ca": {"input": "0.01 / 1K Tokens + image fees", "output": "0.04 / 1K Tokens", "support": "support", "features": "CA region GPT-4O."},
+    "gpt-4o-mini-ca": {"input": "0.00075 / 1K Tokens", "output": "0.003 / 1K Tokens", "support": "support", "features": "CA mini."},
+    "o1-mini-ca": {"input": "0.012 / 1K Tokens", "output": "0.048 / 1K Tokens", "support": "support", "features": "CA reasoning mini."},
+    "o1-preview-ca": {"input": "0.06 / 1K Tokens", "output": "0.24 / 1K Tokens", "support": "support", "features": "CA preview reasoning."}
 }
 def create_client(base_url):
     api_key = os.getenv("OPENAI_API_KEY")
     if not api_key:
+        raise ValueError("Missing OPENAI_API_KEY environment variable")
     return OpenAI(api_key=api_key, base_url=base_url)
+def get_model_card(model_name):
+    m = MODELS.get(model_name)
+    if not m:
+        return "Model not found in catalog"
+    return f"**{model_name}**\n\nInput price: {m['input']}\n\nOutput price: {m['output']}\n\nSupport: {m['support']}\n\n{m['features']}"
+def export_history(history):
+    try:
+        fname = f"chat_history_{int(time.time())}.json"
+        with open(fname, "w", encoding="utf-8") as f:
+            json.dump([{"user": u, "assistant": a} for u, a in (history or [])], f, ensure_ascii=False, indent=2)
+        return f"Saved to {fname}"
+    except Exception as e:
+        return f"Export failed: {e}"
+def respond_stream(user_message, history, host_choice, model_name, temperature, top_p, max_tokens, system_prompt):
+    history = history or []
+    if not user_message or not user_message.strip():
+        yield history + [("", "⚠️ Please enter a message to send.")]
+        return
+    messages = []
+    sys_prompt = system_prompt.strip() if system_prompt and system_prompt.strip() else "You are a helpful, concise assistant."
+    messages.append({"role": "system", "content": sys_prompt})
     for human, ai in history:
         messages.append({"role": "user", "content": human})
         messages.append({"role": "assistant", "content": ai})
     messages.append({"role": "user", "content": user_message})
+    last_error = None
+    hosts_to_try = [HOSTS.get(host_choice)] if HOSTS.get(host_choice) else list(HOSTS.values())
+    for base in hosts_to_try:
         try:
+            client = create_client(base)
+        except Exception as e:
+            last_error = e
+            continue
+        try:
+            try:
+                stream = client.chat.completions.stream(model=model_name, messages=messages, temperature=float(temperature), top_p=float(top_p), max_tokens=int(max_tokens), timeout=30)
                 partial = ""
+                last_update = time.time()
+                with stream as s:
+                    for event in s:
+                        try:
+                            if getattr(event, "type", None) == "message.delta":
+                                delta = getattr(event, "delta", None)
+                                content = getattr(delta, "content", None) if delta is not None else None
+                                if content:
+                                    partial += content
+                                    if time.time() - last_update > 0.15:
+                                        yield history + [(user_message, partial)]
+                                        last_update = time.time()
+                            elif getattr(event, "type", None) == "message":
+                                content = getattr(event, "message", None)
+                                content_text = None
+                                if content and hasattr(content, "content"):
+                                    content_text = getattr(content.content, "get", lambda k, d=None: None)("text", None) if hasattr(content, "content") else None
+                                if content_text:
+                                    partial += content_text
+                                    yield history + [(user_message, partial)]
+                        except Exception:
+                            pass
+                if not partial.strip():
+                    partial = "No output received from model. Possible reasons: invalid model, API error, usage limits, or network timeout."
                 history.append((user_message, partial))
                 yield history
+                return
+            except AttributeError:
+                resp = client.chat.completions.create(model=model_name, messages=messages, temperature=float(temperature), top_p=float(top_p), max_tokens=int(max_tokens))
+                bot_reply = ""
+                try:
+                    bot_reply = resp.choices[0].message.content
+                except Exception:
+                    try:
+                        bot_reply = resp["choices"][0]["message"]["content"]
+                    except Exception:
+                        bot_reply = str(resp)
+                if not bot_reply or not str(bot_reply).strip():
+                    bot_reply = "No output received from model. Possible reasons: invalid model, API error, usage limits, or network timeout."
+                history.append((user_message, bot_reply))
+                yield history
+                return
+        except Exception as e:
+            last_error = e
+            continue
+    err_text = f"❌ All hosts failed. Last error: {last_error}\nCheck OPENAI_API_KEY, selected model, and network connectivity."
+    history.append((user_message, err_text))
+    yield history
+model_choices = sorted(MODELS.keys())
+with gr.Blocks(title="Polished GPT App", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("<h2 style='text-align:center'>Polished GPT UI — Model Catalog Integrated</h2>")
     with gr.Row():
         with gr.Column(scale=3):
+            chatbot = gr.Chatbot(label="Conversation", height=520, show_copy_button=True, render_markdown=True)
             with gr.Row():
+                txt = gr.Textbox(placeholder="Type a message and press Enter", lines=2)
+                send = gr.Button("Send", variant="primary")
+            with gr.Row():
+                stop_btn = gr.Button("Stop", variant="secondary")
+                clear_btn = gr.Button("Clear", variant="secondary")
+                export_btn = gr.Button("Export")
+                status = gr.Markdown("")
         with gr.Column(scale=1):
+            host = gr.Radio(list(HOSTS.keys()), value="Domestic (Lower Latency)", label="API Host")
+            model_dropdown = gr.Dropdown(model_choices, value="gpt-3.5-turbo", label="Model")
+            model_card = gr.Markdown(get_model_card("gpt-3.5-turbo"))
+            temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature")
+            top_p = gr.Slider(0.05, 1.0, value=1.0, step=0.05, label="Top-p")
+            max_tokens = gr.Slider(64, 8192, value=512, step=64, label="Max Tokens")
+            system_prompt = gr.Textbox(label="System Prompt (optional)", lines=3, placeholder="You are a helpful assistant.")
+    send.click(respond_stream, [txt, chatbot, host, model_dropdown, temperature, top_p, max_tokens, system_prompt], chatbot, queue=True)
+    txt.submit(respond_stream, [txt, chatbot, host, model_dropdown, temperature, top_p, max_tokens, system_prompt], chatbot, queue=True)
+    model_dropdown.change(lambda m: get_model_card(m), model_dropdown, model_card)
+    clear_btn.click(lambda: [], None, chatbot)
+    export_btn.click(lambda h: export_history(h), chatbot, status)
+    stop_btn.click(lambda: "stop", None, None)
 if __name__ == "__main__":
+    demo.queue().launch(server_name="0.0.0.0", server_port=7860, show_api=False)