Spaces:

MawaredHR
/

Vision_tester

Running

App Files Files Community

Daemontatox commited on Feb 11

Commit

2ea23a7

verified ·

1 Parent(s): 8adc570

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -28

app.py CHANGED Viewed

@@ -125,7 +125,24 @@ def clear_context():
 # Predetermined Prompts
 # -------------------------------
 predetermined_prompts = {
     "Software Tester": (
         "Act as a software tester. Analyze the uploaded image of a software interface and generate comprehensive "
         "test cases for its features. For each feature, provide test steps, expected results, and any necessary "
@@ -138,10 +155,14 @@ predetermined_prompts = {
 # -------------------------------
 def chat_respond(user_message, history, prompt_option):
     """
-    Append the user message to the conversation history, call the API, and return the full response.
     The conversation history is a list of [user_text, assistant_text] pairs.
     """
-    # If this is the first message and none is provided, use the predetermined prompt.
     if history == []:
         if not user_message.strip():
             user_message = predetermined_prompts.get(prompt_option, "Hello")
@@ -150,47 +171,42 @@ def chat_respond(user_message, history, prompt_option):
     history = history + [[user_message, ""]]
-    # Build the messages list for the multimodal API
     messages = []
     for i, (user_msg, assistant_msg) in enumerate(history):
-        user_content = [{"type": "text", "text": user_msg}]
-        # For the very first message, attach the image (if available)
         if i == 0 and doc_state.current_doc_images:
             buffered = io.BytesIO()
             doc_state.current_doc_images[0].save(buffered, format="PNG")
             img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
             data_uri = f"data:image/png;base64,{img_b64}"
-            user_content.append({
-                "type": "image_url",
-                "image_url": {"url": data_uri}
-            })
-        messages.append({"role": "user", "content": user_content})
         if assistant_msg:
-            messages.append({
-                "role": "assistant",
-                "content": [{"type": "text", "text": assistant_msg}]
-            })
-    # Call the API (using stream=True internally but waiting for the full response)
     try:
-        stream = client.chat.completions.create(
-            model="google/gemini-2.0-pro-exp-02-05:free",
             messages=messages,
-            max_tokens=8192,
-            stream=True
         )
     except Exception as e:
         logger.error(f"Error calling the API: {str(e)}")
         history[-1][1] = "An error occurred while processing your request. Please check your API credentials."
         return history, history
-    # Gather the full response from the streaming generator
-    buffer = ""
-    for chunk in stream:
-        delta = chunk.choices[0].delta.content
-        buffer += delta
-    history[-1][1] = buffer
     return history, history
 # -------------------------------
@@ -215,14 +231,17 @@ with gr.Blocks() as demo:
         prompt_dropdown = gr.Dropdown(
             label="Select Prompt",
             choices=[
                 "Software Tester"
             ],
             value="Software Tester"
         )
         clear_btn = gr.Button("Clear Document Context & Chat History")
-    # Set type='messages' to avoid deprecation warnings
     chatbot = gr.Chatbot(label="Chat History", type="messages", elem_id="chatbot")
     with gr.Row():

 # Predetermined Prompts
 # -------------------------------
 predetermined_prompts = {
+    "NOC Timesheet": (
+        "Extract structured information from the provided timesheet. The extracted details should include:\n"
+        "Name, Position Title, Work Location, Contractor, NOC ID, Month and Year, Regular Service Days, "
+        "Standby Days, Offshore Days, Extended Hitch Days, and approvals. Format the output as valid JSON."
+    ),
+    "Aramco Full structured": (
+        "You are a document parsing assistant designed to extract structured data from various documents such as "
+        "invoices, timesheets, purchase orders, and travel bookings. Return only valid JSON with no extra text."
+    ),
+    "Aramco Timesheet only": (
+        "Extract time tracking, work details, and approvals. Return a JSON object following the specified structure."
+    ),
+    "NOC Invoice": (
+        "You are a highly accurate data extraction system. Analyze the provided invoice image and extract all data "
+        "into the following JSON format:\n"
+        "{\n  'invoiceDetails': { ... },\n  'from': { ... },\n  'to': { ... },\n  'services': [ ... ],\n  "
+        "'totals': { ... },\n  'bankDetails': { ... }\n}"
+    ),
     "Software Tester": (
         "Act as a software tester. Analyze the uploaded image of a software interface and generate comprehensive "
         "test cases for its features. For each feature, provide test steps, expected results, and any necessary "
 # -------------------------------
 def chat_respond(user_message, history, prompt_option):
     """
+    Append the user message to the conversation history, call the API,
+    and return the full response.
+    Each message passed to the API is now a dictionary with a string value for 'content'.
+    If an image was uploaded, its data URI is appended to the first user message.
     The conversation history is a list of [user_text, assistant_text] pairs.
     """
+    # On the first message, if none is provided, use the predetermined prompt.
     if history == []:
         if not user_message.strip():
             user_message = predetermined_prompts.get(prompt_option, "Hello")
     history = history + [[user_message, ""]]
     messages = []
+    # Build the messages list with each message as a dictionary containing role and a string content.
     for i, (user_msg, assistant_msg) in enumerate(history):
+        # For the very first user message, attach the image (if available) by appending its data URI.
         if i == 0 and doc_state.current_doc_images:
             buffered = io.BytesIO()
             doc_state.current_doc_images[0].save(buffered, format="PNG")
             img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
             data_uri = f"data:image/png;base64,{img_b64}"
+            text_to_send = user_msg + "\n[Attached Image: " + data_uri + "]"
+        else:
+            text_to_send = user_msg
+        messages.append({"role": "user", "content": text_to_send})
         if assistant_msg:
+            messages.append({"role": "assistant", "content": assistant_msg})
     try:
+        # Call the API without streaming. The messages are now standard dictionaries.
+        response = client.chat.completions.create(
+            model="qwen/qwen-vl-plus:free",
             messages=messages,
+            max_tokens=500
         )
     except Exception as e:
         logger.error(f"Error calling the API: {str(e)}")
         history[-1][1] = "An error occurred while processing your request. Please check your API credentials."
         return history, history
+    # Assuming the API returns a standard completion response, extract the assistant's reply.
+    try:
+        full_response = response.choices[0].message["content"]
+    except Exception as e:
+        logger.error(f"Error extracting API response: {str(e)}")
+        full_response = "An error occurred while processing the API response."
+    history[-1][1] = full_response
     return history, history
 # -------------------------------
         prompt_dropdown = gr.Dropdown(
             label="Select Prompt",
             choices=[
+                "NOC Timesheet",
+                "Aramco Full structured",
+                "Aramco Timesheet only",
+                "NOC Invoice",
                 "Software Tester"
             ],
             value="Software Tester"
         )
         clear_btn = gr.Button("Clear Document Context & Chat History")
+    # Set type='messages' to avoid deprecation warnings.
     chatbot = gr.Chatbot(label="Chat History", type="messages", elem_id="chatbot")
     with gr.Row():