Daemontatox commited on
Commit
2ea23a7
·
verified ·
1 Parent(s): 8adc570

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -28
app.py CHANGED
@@ -125,7 +125,24 @@ def clear_context():
125
  # Predetermined Prompts
126
  # -------------------------------
127
  predetermined_prompts = {
128
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  "Software Tester": (
130
  "Act as a software tester. Analyze the uploaded image of a software interface and generate comprehensive "
131
  "test cases for its features. For each feature, provide test steps, expected results, and any necessary "
@@ -138,10 +155,14 @@ predetermined_prompts = {
138
  # -------------------------------
139
  def chat_respond(user_message, history, prompt_option):
140
  """
141
- Append the user message to the conversation history, call the API, and return the full response.
 
 
 
 
142
  The conversation history is a list of [user_text, assistant_text] pairs.
143
  """
144
- # If this is the first message and none is provided, use the predetermined prompt.
145
  if history == []:
146
  if not user_message.strip():
147
  user_message = predetermined_prompts.get(prompt_option, "Hello")
@@ -150,47 +171,42 @@ def chat_respond(user_message, history, prompt_option):
150
 
151
  history = history + [[user_message, ""]]
152
 
153
- # Build the messages list for the multimodal API
154
  messages = []
 
155
  for i, (user_msg, assistant_msg) in enumerate(history):
156
- user_content = [{"type": "text", "text": user_msg}]
157
- # For the very first message, attach the image (if available)
158
  if i == 0 and doc_state.current_doc_images:
159
  buffered = io.BytesIO()
160
  doc_state.current_doc_images[0].save(buffered, format="PNG")
161
  img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
162
  data_uri = f"data:image/png;base64,{img_b64}"
163
- user_content.append({
164
- "type": "image_url",
165
- "image_url": {"url": data_uri}
166
- })
167
- messages.append({"role": "user", "content": user_content})
168
  if assistant_msg:
169
- messages.append({
170
- "role": "assistant",
171
- "content": [{"type": "text", "text": assistant_msg}]
172
- })
173
 
174
- # Call the API (using stream=True internally but waiting for the full response)
175
  try:
176
- stream = client.chat.completions.create(
177
- model="google/gemini-2.0-pro-exp-02-05:free",
 
178
  messages=messages,
179
- max_tokens=8192,
180
- stream=True
181
  )
182
  except Exception as e:
183
  logger.error(f"Error calling the API: {str(e)}")
184
  history[-1][1] = "An error occurred while processing your request. Please check your API credentials."
185
  return history, history
186
 
187
- # Gather the full response from the streaming generator
188
- buffer = ""
189
- for chunk in stream:
190
- delta = chunk.choices[0].delta.content
191
- buffer += delta
 
192
 
193
- history[-1][1] = buffer
194
  return history, history
195
 
196
  # -------------------------------
@@ -215,14 +231,17 @@ with gr.Blocks() as demo:
215
  prompt_dropdown = gr.Dropdown(
216
  label="Select Prompt",
217
  choices=[
218
-
 
 
 
219
  "Software Tester"
220
  ],
221
  value="Software Tester"
222
  )
223
  clear_btn = gr.Button("Clear Document Context & Chat History")
224
 
225
- # Set type='messages' to avoid deprecation warnings
226
  chatbot = gr.Chatbot(label="Chat History", type="messages", elem_id="chatbot")
227
 
228
  with gr.Row():
 
125
  # Predetermined Prompts
126
  # -------------------------------
127
  predetermined_prompts = {
128
+ "NOC Timesheet": (
129
+ "Extract structured information from the provided timesheet. The extracted details should include:\n"
130
+ "Name, Position Title, Work Location, Contractor, NOC ID, Month and Year, Regular Service Days, "
131
+ "Standby Days, Offshore Days, Extended Hitch Days, and approvals. Format the output as valid JSON."
132
+ ),
133
+ "Aramco Full structured": (
134
+ "You are a document parsing assistant designed to extract structured data from various documents such as "
135
+ "invoices, timesheets, purchase orders, and travel bookings. Return only valid JSON with no extra text."
136
+ ),
137
+ "Aramco Timesheet only": (
138
+ "Extract time tracking, work details, and approvals. Return a JSON object following the specified structure."
139
+ ),
140
+ "NOC Invoice": (
141
+ "You are a highly accurate data extraction system. Analyze the provided invoice image and extract all data "
142
+ "into the following JSON format:\n"
143
+ "{\n 'invoiceDetails': { ... },\n 'from': { ... },\n 'to': { ... },\n 'services': [ ... ],\n "
144
+ "'totals': { ... },\n 'bankDetails': { ... }\n}"
145
+ ),
146
  "Software Tester": (
147
  "Act as a software tester. Analyze the uploaded image of a software interface and generate comprehensive "
148
  "test cases for its features. For each feature, provide test steps, expected results, and any necessary "
 
155
  # -------------------------------
156
  def chat_respond(user_message, history, prompt_option):
157
  """
158
+ Append the user message to the conversation history, call the API,
159
+ and return the full response.
160
+
161
+ Each message passed to the API is now a dictionary with a string value for 'content'.
162
+ If an image was uploaded, its data URI is appended to the first user message.
163
  The conversation history is a list of [user_text, assistant_text] pairs.
164
  """
165
+ # On the first message, if none is provided, use the predetermined prompt.
166
  if history == []:
167
  if not user_message.strip():
168
  user_message = predetermined_prompts.get(prompt_option, "Hello")
 
171
 
172
  history = history + [[user_message, ""]]
173
 
 
174
  messages = []
175
+ # Build the messages list with each message as a dictionary containing role and a string content.
176
  for i, (user_msg, assistant_msg) in enumerate(history):
177
+ # For the very first user message, attach the image (if available) by appending its data URI.
 
178
  if i == 0 and doc_state.current_doc_images:
179
  buffered = io.BytesIO()
180
  doc_state.current_doc_images[0].save(buffered, format="PNG")
181
  img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
182
  data_uri = f"data:image/png;base64,{img_b64}"
183
+ text_to_send = user_msg + "\n[Attached Image: " + data_uri + "]"
184
+ else:
185
+ text_to_send = user_msg
186
+ messages.append({"role": "user", "content": text_to_send})
 
187
  if assistant_msg:
188
+ messages.append({"role": "assistant", "content": assistant_msg})
 
 
 
189
 
 
190
  try:
191
+ # Call the API without streaming. The messages are now standard dictionaries.
192
+ response = client.chat.completions.create(
193
+ model="qwen/qwen-vl-plus:free",
194
  messages=messages,
195
+ max_tokens=500
 
196
  )
197
  except Exception as e:
198
  logger.error(f"Error calling the API: {str(e)}")
199
  history[-1][1] = "An error occurred while processing your request. Please check your API credentials."
200
  return history, history
201
 
202
+ # Assuming the API returns a standard completion response, extract the assistant's reply.
203
+ try:
204
+ full_response = response.choices[0].message["content"]
205
+ except Exception as e:
206
+ logger.error(f"Error extracting API response: {str(e)}")
207
+ full_response = "An error occurred while processing the API response."
208
 
209
+ history[-1][1] = full_response
210
  return history, history
211
 
212
  # -------------------------------
 
231
  prompt_dropdown = gr.Dropdown(
232
  label="Select Prompt",
233
  choices=[
234
+ "NOC Timesheet",
235
+ "Aramco Full structured",
236
+ "Aramco Timesheet only",
237
+ "NOC Invoice",
238
  "Software Tester"
239
  ],
240
  value="Software Tester"
241
  )
242
  clear_btn = gr.Button("Clear Document Context & Chat History")
243
 
244
+ # Set type='messages' to avoid deprecation warnings.
245
  chatbot = gr.Chatbot(label="Chat History", type="messages", elem_id="chatbot")
246
 
247
  with gr.Row():