Spaces:

gr0010
/

CustomThinker-Demo

Running on Zero

App Files Files Community

gr0010 commited on Aug 30

Commit

0bc74d6

verified ·

1 Parent(s): 10d71a3

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -52

app.py CHANGED Viewed

@@ -111,9 +111,10 @@ custom_css = """
 """
 with gr.Blocks(theme=gr.themes.Soft(), fill_height=True, css=custom_css) as demo:
-    # State for conversation history and generation status
-    conversation_state = gr.State([])  # List of message dictionaries with role/content
-    is_generating_state = gr.State(False)  # To prevent multiple submissions
     # Model info and CTA section
     gr.HTML("""
@@ -154,7 +155,7 @@ Think using bullet points and short sentences to simulate thoughts and emoticons
         bubble_full_width=False,
         height=500,
         show_copy_button=True,
-        type="messages"  # This requires proper message format
     )
     with gr.Row():
@@ -236,17 +237,19 @@ Think using bullet points and short sentences to simulate thoughts and emoticons
                 info="Maximum response length"
             )
-    def handle_user_message(user_message: str, conversation: list, system_prompt_text: str,
-                          is_generating: bool, temp: float, top_p_val: float, top_k_val: int,
                           min_p_val: float, max_tokens: int):
         """
-        Handles user input, updates conversation, and generates the model's response.
         """
         # Prevent multiple submissions
         if is_generating or not user_message.strip():
             return {
-                chatbot: conversation,
-                conversation_state: conversation,
                 is_generating_state: is_generating,
                 user_input: user_message,
                 submit_btn: gr.update(interactive=not is_generating)
@@ -255,13 +258,17 @@ Think using bullet points and short sentences to simulate thoughts and emoticons
         # Set generating state
         is_generating = True
-        # Add user message to conversation
-        conversation.append({"role": "user", "content": user_message.strip()})
         # Yield intermediate state to show user message and disable input
         yield {
-            chatbot: conversation,
-            conversation_state: conversation,
             is_generating_state: is_generating,
             user_input: "",
             submit_btn: gr.update(interactive=False, value="🔄 Generating...")
@@ -271,9 +278,7 @@ Think using bullet points and short sentences to simulate thoughts and emoticons
         messages_for_model = []
         if system_prompt_text.strip():
             messages_for_model.append({"role": "system", "content": system_prompt_text.strip()})
-        # Add conversation history (excluding system messages for model input)
-        messages_for_model.extend([msg for msg in conversation if msg["role"] != "system"])
         try:
             # Generate response with hyperparameters
@@ -286,7 +291,10 @@ Think using bullet points and short sentences to simulate thoughts and emoticons
                 max_new_tokens=max_tokens
             )
-            # Format response for display
             if thinking and thinking.strip():
                 formatted_response = f"""<details>
 <summary><b>🤔 Show Reasoning Process</b></summary>
@@ -299,72 +307,78 @@ Think using bullet points and short sentences to simulate thoughts and emoticons
             else:
                 formatted_response = answer
-            # Add assistant response to conversation
-            conversation.append({"role": "assistant", "content": formatted_response})
         except Exception as e:
             error_msg = f"❌ Error generating response: {str(e)}"
-            conversation.append({"role": "assistant", "content": error_msg})
         # Reset generating state
         is_generating = False
         # Final yield with complete response
         yield {
-            chatbot: conversation,
-            conversation_state: conversation,
             is_generating_state: is_generating,
             user_input: "",
             submit_btn: gr.update(interactive=True, value="Send")
         }
     def clear_history():
-        """Clear conversation history"""
         return {
             chatbot: [],
-            conversation_state: [],
             is_generating_state: False,
             user_input: "",
             submit_btn: gr.update(interactive=True, value="Send")
         }
-    def retry_last(conversation: list, system_prompt_text: str,
                    temp: float, top_p_val: float, top_k_val: int,
                    min_p_val: float, max_tokens: int):
         """
-        Retry the last user message.
         """
-        # Safety check: ensure there is a conversation and the last message was from the assistant
-        if not conversation or conversation[-1]["role"] != "assistant":
             # If nothing to retry, yield the current state and stop
             yield {
-                chatbot: conversation,
-                conversation_state: conversation,
                 is_generating_state: False
             }
             return
-        # Remove the last assistant message
-        conversation.pop()
-        # Get the last user message
-        if conversation and conversation[-1]["role"] == "user":
-            last_user_msg = conversation[-1]["content"]
-            # Remove the user message too, as handle_user_message will add it back
-            conversation.pop()
         else:
             # If no user message found, just return current state
             yield {
-                chatbot: conversation,
-                conversation_state: conversation,
                 is_generating_state: False
             }
             return
         # Use 'yield from' to properly call the generator and pass its updates
         yield from handle_user_message(
-            last_user_msg, conversation, system_prompt_text, False,
-            temp, top_p_val, top_k_val, min_p_val, max_tokens
         )
     def on_input_change(text, is_generating):
@@ -374,31 +388,35 @@ Think using bullet points and short sentences to simulate thoughts and emoticons
     # Event listeners
     submit_event = submit_btn.click(
         handle_user_message,
-        inputs=[user_input, conversation_state, system_prompt, is_generating_state,
-                temperature, top_p, top_k, min_p, max_new_tokens],
-        outputs=[chatbot, conversation_state, is_generating_state, user_input, submit_btn],
         show_progress=True
     )
     submit_event_enter = user_input.submit(
         handle_user_message,
-        inputs=[user_input, conversation_state, system_prompt, is_generating_state,
-                temperature, top_p, top_k, min_p, max_new_tokens],
-        outputs=[chatbot, conversation_state, is_generating_state, user_input, submit_btn],
         show_progress=True
     )
     # Clear button event
     clear_btn.click(
         clear_history,
-        outputs=[chatbot, conversation_state, is_generating_state, user_input, submit_btn]
     )
-    # Retry button event
     retry_btn.click(
         retry_last,
-        inputs=[conversation_state, system_prompt, temperature, top_p, top_k, min_p, max_new_tokens],
-        outputs=[chatbot, conversation_state, is_generating_state],
         show_progress=True
     )

 """
 with gr.Blocks(theme=gr.themes.Soft(), fill_height=True, css=custom_css) as demo:
+    # Separate states for display and model context
+    display_history_state = gr.State([])  # For Gradio chatbot display (with HTML formatting)
+    model_history_state = gr.State([])    # Clean history for model (plain text only)
+    is_generating_state = gr.State(False) # To prevent multiple submissions
     # Model info and CTA section
     gr.HTML("""
         bubble_full_width=False,
         height=500,
         show_copy_button=True,
+        type="messages"
     )
     with gr.Row():
                 info="Maximum response length"
             )
+    def handle_user_message(user_message: str, display_history: list, model_history: list,
+                          system_prompt_text: str, is_generating: bool,
+                          temp: float, top_p_val: float, top_k_val: int,
                           min_p_val: float, max_tokens: int):
         """
+        Handles user input, updates histories, and generates the model's response.
         """
         # Prevent multiple submissions
         if is_generating or not user_message.strip():
             return {
+                chatbot: display_history,
+                display_history_state: display_history,
+                model_history_state: model_history,
                 is_generating_state: is_generating,
                 user_input: user_message,
                 submit_btn: gr.update(interactive=not is_generating)
         # Set generating state
         is_generating = True
+        # Update model history (clean format for model - PLAIN TEXT ONLY)
+        model_history.append({"role": "user", "content": user_message.strip()})
+        # Update display history (for Gradio chatbot)
+        display_history.append({"role": "user", "content": user_message.strip()})
         # Yield intermediate state to show user message and disable input
         yield {
+            chatbot: display_history,
+            display_history_state: display_history,
+            model_history_state: model_history,
             is_generating_state: is_generating,
             user_input: "",
             submit_btn: gr.update(interactive=False, value="🔄 Generating...")
         messages_for_model = []
         if system_prompt_text.strip():
             messages_for_model.append({"role": "system", "content": system_prompt_text.strip()})
+        messages_for_model.extend(model_history)
         try:
             # Generate response with hyperparameters
                 max_new_tokens=max_tokens
             )
+            # Update model history with CLEAN answer (no HTML formatting)
+            model_history.append({"role": "assistant", "content": answer})
+            # Format response for display (with HTML formatting)
             if thinking and thinking.strip():
                 formatted_response = f"""<details>
 <summary><b>🤔 Show Reasoning Process</b></summary>
             else:
                 formatted_response = answer
+            # Update display history with formatted response
+            display_history.append({"role": "assistant", "content": formatted_response})
         except Exception as e:
             error_msg = f"❌ Error generating response: {str(e)}"
+            display_history.append({"role": "assistant", "content": error_msg})
+            # Don't add error to model history to avoid confusing the model
         # Reset generating state
         is_generating = False
         # Final yield with complete response
         yield {
+            chatbot: display_history,
+            display_history_state: display_history,
+            model_history_state: model_history,
             is_generating_state: is_generating,
             user_input: "",
             submit_btn: gr.update(interactive=True, value="Send")
         }
     def clear_history():
+        """Clear both display and model histories"""
         return {
             chatbot: [],
+            display_history_state: [],
+            model_history_state: [],
             is_generating_state: False,
             user_input: "",
             submit_btn: gr.update(interactive=True, value="Send")
         }
+    def retry_last(display_history: list, model_history: list, system_prompt_text: str,
                    temp: float, top_p_val: float, top_k_val: int,
                    min_p_val: float, max_tokens: int):
         """
+        Retry the last user message with corrected history and generator handling.
         """
+        # Safety check: ensure there is a history and the last message was from the assistant
+        if not model_history or model_history[-1]["role"] != "assistant":
             # If nothing to retry, yield the current state and stop
             yield {
+                chatbot: display_history,
+                display_history_state: display_history,
+                model_history_state: model_history,
                 is_generating_state: False
             }
             return
+        # Remove the last assistant message from both histories
+        model_history.pop()  # Remove assistant's clean message from model history
+        display_history.pop()  # Remove assistant's formatted message from display history
+        # Get the last user message to resubmit it, then remove it from both histories
+        if model_history and model_history[-1]["role"] == "user":
+            last_user_msg = model_history[-1]["content"]
+            model_history.pop()  # Remove user message from model history
+            display_history.pop()  # Remove user message from display history
         else:
             # If no user message found, just return current state
             yield {
+                chatbot: display_history,
+                display_history_state: display_history,
+                model_history_state: model_history,
                 is_generating_state: False
             }
             return
         # Use 'yield from' to properly call the generator and pass its updates
         yield from handle_user_message(
+            last_user_msg, display_history, model_history,
+            system_prompt_text, False, temp, top_p_val, top_k_val, min_p_val, max_tokens
         )
     def on_input_change(text, is_generating):
     # Event listeners
     submit_event = submit_btn.click(
         handle_user_message,
+        inputs=[user_input, display_history_state, model_history_state, system_prompt,
+                is_generating_state, temperature, top_p, top_k, min_p, max_new_tokens],
+        outputs=[chatbot, display_history_state, model_history_state, is_generating_state,
+                 user_input, submit_btn],
         show_progress=True
     )
     submit_event_enter = user_input.submit(
         handle_user_message,
+        inputs=[user_input, display_history_state, model_history_state, system_prompt,
+                is_generating_state, temperature, top_p, top_k, min_p, max_new_tokens],
+        outputs=[chatbot, display_history_state, model_history_state, is_generating_state,
+                 user_input, submit_btn],
         show_progress=True
     )
     # Clear button event
     clear_btn.click(
         clear_history,
+        outputs=[chatbot, display_history_state, model_history_state, is_generating_state,
+                 user_input, submit_btn]
     )
+    # Retry button event - FIXED OUTPUTS
     retry_btn.click(
         retry_last,
+        inputs=[display_history_state, model_history_state, system_prompt,
+                temperature, top_p, top_k, min_p, max_new_tokens],
+        outputs=[chatbot, display_history_state, model_history_state, is_generating_state],
         show_progress=True
     )