DeepSeek-R1-32b-api

Sleeping

App Files Files Community

ruslanmv commited on Jan 28

Commit

9ab0176

verified ·

1 Parent(s): 19b4b1f

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -63

app.py CHANGED Viewed

@@ -1,24 +1,51 @@
-import gradio as gr
-from models import demo  # Import the demo object from models.py
 # --- Chatbot function ---
 def chatbot(input_text, history, model_choice, system_message, max_new_tokens, temperature, top_p):
     history = history or []
     # Create payload for the model
     payload = {
-        "messages": [{"role": "user", "content": input_text}],
-        "system": system_message,
-        "max_tokens": max_new_tokens,
-        "temperature": temperature,
-        "top_p": top_p
     }
     # Run inference using the selected model
     try:
-        response = demo(payload)  # Use the demo object directly
-        if isinstance(response, dict) and "choices" in response:
-            assistant_response = response["choices"][0]["message"]["content"]
         else:
             assistant_response = "Unexpected model response format."
     except Exception as e:
@@ -27,63 +54,78 @@ def chatbot(input_text, history, model_choice, system_message, max_new_tokens, t
     # Append user and assistant messages to history
     history.append((input_text, assistant_response))
-    return history, history, ""
-# --- Gradio Interface ---
-with gr.Blocks(theme=gr.themes.Soft(), title="DeepSeek Chatbot") as app:
-    gr.Markdown(
-        """
-        # DeepSeek Chatbot
-        Created by [ruslanmv.com](https://ruslanmv.com/)
-        This is a demo of different DeepSeek models. Select a model, type your message, and click "Submit".
-        You can also adjust optional parameters like system message, max new tokens, temperature, and top-p.
-        """
     )
-    with gr.Row():
-        with gr.Column():
-            chatbot_output = gr.Chatbot(label="DeepSeek Chatbot", height=500)
-            msg = gr.Textbox(label="Your Message", placeholder="Type your message here...")
-            with gr.Row():
-                submit_btn = gr.Button("Submit", variant="primary")
-                clear_btn = gr.ClearButton([msg, chatbot_output])
-    with gr.Row():
-        with gr.Accordion("Options", open=True):
-            model_choice = gr.Radio(
-                choices=["DeepSeek-R1-Distill-Qwen-32B", "DeepSeek-R1", "DeepSeek-R1-Zero"],
-                label="Choose a Model",
-                value="DeepSeek-R1"
-            )
-            with gr.Accordion("Optional Parameters", open=False):
-                system_message = gr.Textbox(
-                    label="System Message",
-                    value="You are a friendly Chatbot created by ruslanmv.com",
-                    lines=2,
-                )
-                max_new_tokens = gr.Slider(
-                    minimum=1, maximum=4000, value=200, label="Max New Tokens"
-                )
-                temperature = gr.Slider(
-                    minimum=0.10, maximum=4.00, value=0.70, label="Temperature"
-                )
-                top_p = gr.Slider(
-                    minimum=0.10, maximum=1.00, value=0.90, label="Top-p (nucleus sampling)"
-                )
-    chat_history = gr.State([])
-    # Event handling
-    submit_btn.click(
-        chatbot,
-        [msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p],
-        [chatbot_output, chat_history, msg]
     )
-    msg.submit(
-        chatbot,
-        [msg, chat_history, model_choice, system_message, max_new_tokens, temperature, top_p],
-        [chatbot_output, chat_history, msg]
-    )
-if __name__ == "__main__":
-    app.launch()

+import streamlit as st
+from functools import lru_cache
+import requests
+# Cache model loading to optimize performance
+@lru_cache(maxsize=3)
+def load_hf_model(model_name):
+    # Use the Hugging Face Inference API directly
+    api_url = f"https://api-inference.huggingface.co/models/deepseek-ai/{model_name}"
+    return api_url
+# Load all models at startup
+MODELS = {
+    "DeepSeek-R1-Distill-Qwen-32B": load_hf_model("DeepSeek-R1-Distill-Qwen-32B"),
+    "DeepSeek-R1": load_hf_model("DeepSeek-R1"),
+    "DeepSeek-R1-Zero": load_hf_model("DeepSeek-R1-Zero")
+}
 # --- Chatbot function ---
 def chatbot(input_text, history, model_choice, system_message, max_new_tokens, temperature, top_p):
     history = history or []
+    # Get the selected model API URL
+    api_url = MODELS[model_choice]
     # Create payload for the model
     payload = {
+        "inputs": {
+            "messages": [{"role": "user", "content": input_text}],
+            "system": system_message,
+            "max_tokens": max_new_tokens,
+            "temperature": temperature,
+            "top_p": top_p
+        }
     }
     # Run inference using the selected model
     try:
+        headers = {"Authorization": f"Bearer {st.secrets['HUGGINGFACE_TOKEN']}"}
+        response = requests.post(api_url, headers=headers, json=payload).json()
+        # Handle the response format
+        if isinstance(response, list) and len(response) > 0:
+            # Assuming the response is a list of generated text
+            assistant_response = response[0].get("generated_text", "No response generated.")
+        elif isinstance(response, dict) and "generated_text" in response:
+            # If the response is a dictionary with generated_text
+            assistant_response = response["generated_text"]
         else:
             assistant_response = "Unexpected model response format."
     except Exception as e:
     # Append user and assistant messages to history
     history.append((input_text, assistant_response))
+    return history
+# --- Streamlit App ---
+st.set_page_config(page_title="DeepSeek Chatbot", page_icon="🤖", layout="wide")
+# Title and description
+st.title("DeepSeek Chatbot")
+st.markdown("""
+    Created by [ruslanmv.com](https://ruslanmv.com/)
+    This is a demo of different DeepSeek models. Select a model, type your message, and click "Submit".
+    You can also adjust optional parameters like system message, max new tokens, temperature, and top-p.
+""")
+# Sidebar for model selection and parameters
+with st.sidebar:
+    st.header("Options")
+    model_choice = st.radio(
+        "Choose a Model",
+        options=list(MODELS.keys()),
+        index=0
+    )
+    st.header("Optional Parameters")
+    system_message = st.text_area(
+        "System Message",
+        value="You are a friendly Chatbot created by ruslanmv.com",
+        height=100
+    )
+    max_new_tokens = st.slider(
+        "Max New Tokens",
+        min_value=1,
+        max_value=4000,
+        value=200
+    )
+    temperature = st.slider(
+        "Temperature",
+        min_value=0.10,
+        max_value=4.00,
+        value=0.70
+    )
+    top_p = st.slider(
+        "Top-p (nucleus sampling)",
+        min_value=0.10,
+        max_value=1.00,
+        value=0.90
     )
+# Initialize chat history
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
+# Display chat history
+for user_msg, assistant_msg in st.session_state.chat_history:
+    with st.chat_message("user"):
+        st.write(user_msg)
+    with st.chat_message("assistant"):
+        st.write(assistant_msg)
+# Input box for user message
+user_input = st.chat_input("Type your message here...")
+# Handle user input
+if user_input:
+    # Add user message to chat history
+    st.session_state.chat_history = chatbot(
+        user_input,
+        st.session_state.chat_history,
+        model_choice,
+        system_message,
+        max_new_tokens,
+        temperature,
+        top_p
     )
+    # Rerun to update the chat display
+    st.rerun()