Spaces:

MawaredHR
/

Vision_tester

Running

App Files Files Community

Daemontatox commited on Feb 11

Commit

8adc570

verified ·

1 Parent(s): edaf4b6

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -22

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from PIL import Image
 import gradio as gr
 from openai import OpenAI  # Use the OpenAI client that supports multimodal messages
-# Load API key from environment variable (secrets)
 HF_API_KEY = os.getenv("OPENAI_TOKEN")
 if not HF_API_KEY:
     raise ValueError("OPENAI_TOKEN environment variable not set")
@@ -82,7 +82,7 @@ def process_uploaded_file(file):
         if file is None:
             return "No file uploaded. Please upload a file."
-        # Get the file path from the Gradio upload (may be a dict or file-like object)
         if isinstance(file, dict):
             file_path = file["name"]
         else:
@@ -125,7 +125,7 @@ def clear_context():
 # Predetermined Prompts
 # -------------------------------
 predetermined_prompts = {
     "Software Tester": (
         "Act as a software tester. Analyze the uploaded image of a software interface and generate comprehensive "
         "test cases for its features. For each feature, provide test steps, expected results, and any necessary "
@@ -134,15 +134,14 @@ predetermined_prompts = {
 }
 # -------------------------------
-# Chat Function with Streaming and Conversation History
 # -------------------------------
 def chat_respond(user_message, history, prompt_option):
     """
-    Append the user message to the conversation history and call the API.
-    In case of an API error (such as unauthorized access), return an error message.
-    The history is a list of [user_text, assistant_text] pairs.
     """
-    # If this is the first message and no message is provided, use the predetermined prompt.
     if history == []:
         if not user_message.strip():
             user_message = predetermined_prompts.get(prompt_option, "Hello")
@@ -151,11 +150,11 @@ def chat_respond(user_message, history, prompt_option):
     history = history + [[user_message, ""]]
-    # Build the messages list for the multimodal API from the conversation history.
     messages = []
     for i, (user_msg, assistant_msg) in enumerate(history):
         user_content = [{"type": "text", "text": user_msg}]
-        # For the very first user message, attach the image if available.
         if i == 0 and doc_state.current_doc_images:
             buffered = io.BytesIO()
             doc_state.current_doc_images[0].save(buffered, format="PNG")
@@ -172,10 +171,10 @@ def chat_respond(user_message, history, prompt_option):
                 "content": [{"type": "text", "text": assistant_msg}]
             })
-    # Try to call the API with streaming enabled.
     try:
         stream = client.chat.completions.create(
-            model="google/gemini-2.0-flash-lite-preview-02-05:free",
             messages=messages,
             max_tokens=8192,
             stream=True
@@ -183,17 +182,15 @@ def chat_respond(user_message, history, prompt_option):
     except Exception as e:
         logger.error(f"Error calling the API: {str(e)}")
         history[-1][1] = "An error occurred while processing your request. Please check your API credentials."
-        yield history, history
-        return
     buffer = ""
     for chunk in stream:
         delta = chunk.choices[0].delta.content
         buffer += delta
-        history[-1][1] = buffer
-        yield history, history
-        time.sleep(0.01)
     return history, history
 # -------------------------------
@@ -218,14 +215,15 @@ with gr.Blocks() as demo:
         prompt_dropdown = gr.Dropdown(
             label="Select Prompt",
             choices=[
                 "Software Tester"
             ],
             value="Software Tester"
         )
         clear_btn = gr.Button("Clear Document Context & Chat History")
-    chatbot = gr.Chatbot(label="Chat History", elem_id="chatbot")
     with gr.Row():
         user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...", show_label=False)
@@ -237,15 +235,14 @@ with gr.Blocks() as demo:
     # When a file is uploaded, process it.
     file_upload.change(fn=process_uploaded_file, inputs=file_upload, outputs=upload_status)
-    # Clear both the document context and the chat history.
     clear_btn.click(fn=clear_context, outputs=[upload_status, chat_state])
     # When the user clicks Send, process the message and update the chat.
     send_btn.click(
         fn=chat_respond,
         inputs=[user_input, chat_state, prompt_dropdown],
-        outputs=[chatbot, chat_state],
-        stream=True
     )
 demo.launch(debug=True)

 import gradio as gr
 from openai import OpenAI  # Use the OpenAI client that supports multimodal messages
+# Load API key from environment variable
 HF_API_KEY = os.getenv("OPENAI_TOKEN")
 if not HF_API_KEY:
     raise ValueError("OPENAI_TOKEN environment variable not set")
         if file is None:
             return "No file uploaded. Please upload a file."
+        # Gradio may pass a dict or a file-like object
         if isinstance(file, dict):
             file_path = file["name"]
         else:
 # Predetermined Prompts
 # -------------------------------
 predetermined_prompts = {
     "Software Tester": (
         "Act as a software tester. Analyze the uploaded image of a software interface and generate comprehensive "
         "test cases for its features. For each feature, provide test steps, expected results, and any necessary "
 }
 # -------------------------------
+# Chat Function (Non-streaming Version)
 # -------------------------------
 def chat_respond(user_message, history, prompt_option):
     """
+    Append the user message to the conversation history, call the API, and return the full response.
+    The conversation history is a list of [user_text, assistant_text] pairs.
     """
+    # If this is the first message and none is provided, use the predetermined prompt.
     if history == []:
         if not user_message.strip():
             user_message = predetermined_prompts.get(prompt_option, "Hello")
     history = history + [[user_message, ""]]
+    # Build the messages list for the multimodal API
     messages = []
     for i, (user_msg, assistant_msg) in enumerate(history):
         user_content = [{"type": "text", "text": user_msg}]
+        # For the very first message, attach the image (if available)
         if i == 0 and doc_state.current_doc_images:
             buffered = io.BytesIO()
             doc_state.current_doc_images[0].save(buffered, format="PNG")
                 "content": [{"type": "text", "text": assistant_msg}]
             })
+    # Call the API (using stream=True internally but waiting for the full response)
     try:
         stream = client.chat.completions.create(
+            model="google/gemini-2.0-pro-exp-02-05:free",
             messages=messages,
             max_tokens=8192,
             stream=True
     except Exception as e:
         logger.error(f"Error calling the API: {str(e)}")
         history[-1][1] = "An error occurred while processing your request. Please check your API credentials."
+        return history, history
+    # Gather the full response from the streaming generator
     buffer = ""
     for chunk in stream:
         delta = chunk.choices[0].delta.content
         buffer += delta
+    history[-1][1] = buffer
     return history, history
 # -------------------------------
         prompt_dropdown = gr.Dropdown(
             label="Select Prompt",
             choices=[
                 "Software Tester"
             ],
             value="Software Tester"
         )
         clear_btn = gr.Button("Clear Document Context & Chat History")
+    # Set type='messages' to avoid deprecation warnings
+    chatbot = gr.Chatbot(label="Chat History", type="messages", elem_id="chatbot")
     with gr.Row():
         user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...", show_label=False)
     # When a file is uploaded, process it.
     file_upload.change(fn=process_uploaded_file, inputs=file_upload, outputs=upload_status)
+    # Clear document context and chat history.
     clear_btn.click(fn=clear_context, outputs=[upload_status, chat_state])
     # When the user clicks Send, process the message and update the chat.
     send_btn.click(
         fn=chat_respond,
         inputs=[user_input, chat_state, prompt_dropdown],
+        outputs=[chatbot, chat_state]
     )
 demo.launch(debug=True)