Daemontatox commited on
Commit
edaf4b6
·
verified ·
1 Parent(s): c7c8ce5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -31
app.py CHANGED
@@ -11,9 +11,9 @@ from openai import OpenAI # Use the OpenAI client that supports multimodal mess
11
  # Load API key from environment variable (secrets)
12
  HF_API_KEY = os.getenv("OPENAI_TOKEN")
13
  if not HF_API_KEY:
14
- raise ValueError("HF_API_KEY environment variable not set")
15
 
16
- # Create the client pointing to the Hugging Face Inference endpoint
17
  client = OpenAI(
18
  base_url="https://openrouter.ai/api/v1",
19
  api_key=HF_API_KEY
@@ -50,15 +50,13 @@ def process_pdf_file(file_path):
50
  page = doc[page_num]
51
  page_text = page.get_text("text")
52
  if page_text.strip():
53
- text += f"Page {page_num + 1}:\n{page_text}\n\n"
54
-
55
  # Render page as an image with a zoom factor
56
  zoom = 3
57
  mat = fitz.Matrix(zoom, zoom)
58
  pix = page.get_pixmap(matrix=mat, alpha=False)
59
  img_data = pix.tobytes("png")
60
  img = Image.open(io.BytesIO(img_data)).convert("RGB")
61
-
62
  # Resize if image is too large
63
  max_size = 1600
64
  if max(img.size) > max_size:
@@ -140,31 +138,24 @@ predetermined_prompts = {
140
  # -------------------------------
141
  def chat_respond(user_message, history, prompt_option):
142
  """
143
- Append the user message (or, if starting a new conversation and no message is provided,
144
- use the predetermined prompt) to the conversation history; build the API call using
145
- the full conversation history (and the image if available); stream back the assistant response
146
- while updating the history.
147
-
148
  The history is a list of [user_text, assistant_text] pairs.
149
  """
150
- # If this is the first message, add the predetermined prompt text.
151
  if history == []:
152
- # If user_message is empty, use the predetermined prompt.
153
  if not user_message.strip():
154
  user_message = predetermined_prompts.get(prompt_option, "Hello")
155
  else:
156
- # Optionally, prepend the predetermined prompt.
157
  user_message = predetermined_prompts.get(prompt_option, "") + "\n" + user_message
158
 
159
- # Append the new user message with an empty assistant response.
160
  history = history + [[user_message, ""]]
161
 
162
- # Build the messages list (for the multimodal API) from the conversation history.
163
  messages = []
164
  for i, (user_msg, assistant_msg) in enumerate(history):
165
- # For the user message:
166
  user_content = [{"type": "text", "text": user_msg}]
167
- # For the very first user message, if an image was uploaded, append the image.
168
  if i == 0 and doc_state.current_doc_images:
169
  buffered = io.BytesIO()
170
  doc_state.current_doc_images[0].save(buffered, format="PNG")
@@ -175,34 +166,31 @@ def chat_respond(user_message, history, prompt_option):
175
  "image_url": {"url": data_uri}
176
  })
177
  messages.append({"role": "user", "content": user_content})
178
- # For the assistant response, if available.
179
  if assistant_msg:
180
  messages.append({
181
  "role": "assistant",
182
  "content": [{"type": "text", "text": assistant_msg}]
183
  })
184
 
185
- # Call the inference API with streaming enabled.
186
  try:
187
  stream = client.chat.completions.create(
188
- model="google/gemini-2.0-pro-exp-02-05:free",
189
  messages=messages,
190
  max_tokens=8192,
191
  stream=True
192
  )
193
  except Exception as e:
194
  logger.error(f"Error calling the API: {str(e)}")
195
- history[-1][1] = "An error occurred while processing your request. Please try again."
196
  yield history, history
 
197
 
198
- # Stream and update the assistant's reply token by token.
199
  buffer = ""
200
  for chunk in stream:
201
  delta = chunk.choices[0].delta.content
202
  buffer += delta
203
- # Update the assistant part of the latest message in the history.
204
  history[-1][1] = buffer
205
- # Yield the updated chat history (for the Chatbot component) and the state.
206
  yield history, history
207
  time.sleep(0.01)
208
 
@@ -212,11 +200,11 @@ def chat_respond(user_message, history, prompt_option):
212
  # Create the Gradio Interface
213
  # -------------------------------
214
  with gr.Blocks() as demo:
215
- gr.Markdown("# Vision Software Testing Chatbot")
216
  gr.Markdown(
217
  "Upload a PDF or an image (PNG, JPG, JPEG, GIF, BMP, WEBP). Then choose a prompt from the dropdown. "
218
  "For example, select **Software Tester** to have the bot analyze an image of a software interface "
219
- "and generate test cases. Chat with the bot in the conversation below."
220
  )
221
 
222
  with gr.Row():
@@ -230,6 +218,7 @@ with gr.Blocks() as demo:
230
  prompt_dropdown = gr.Dropdown(
231
  label="Select Prompt",
232
  choices=[
 
233
  "Software Tester"
234
  ],
235
  value="Software Tester"
@@ -244,16 +233,19 @@ with gr.Blocks() as demo:
244
 
245
  # State to hold the conversation history
246
  chat_state = gr.State([])
247
-
248
  # When a file is uploaded, process it.
249
  file_upload.change(fn=process_uploaded_file, inputs=file_upload, outputs=upload_status)
250
 
251
- # Clear both the document context and chat history.
252
  clear_btn.click(fn=clear_context, outputs=[upload_status, chat_state])
253
 
254
  # When the user clicks Send, process the message and update the chat.
255
- send_btn.click(fn=chat_respond,
256
- inputs=[user_input, chat_state, prompt_dropdown],
257
- outputs=[chatbot, chat_state])
 
 
 
258
 
259
  demo.launch(debug=True)
 
11
  # Load API key from environment variable (secrets)
12
  HF_API_KEY = os.getenv("OPENAI_TOKEN")
13
  if not HF_API_KEY:
14
+ raise ValueError("OPENAI_TOKEN environment variable not set")
15
 
16
+ # Create the client pointing to the inference endpoint (e.g., OpenRouter)
17
  client = OpenAI(
18
  base_url="https://openrouter.ai/api/v1",
19
  api_key=HF_API_KEY
 
50
  page = doc[page_num]
51
  page_text = page.get_text("text")
52
  if page_text.strip():
53
+ text += f"Page {page_num+1}:\n{page_text}\n\n"
 
54
  # Render page as an image with a zoom factor
55
  zoom = 3
56
  mat = fitz.Matrix(zoom, zoom)
57
  pix = page.get_pixmap(matrix=mat, alpha=False)
58
  img_data = pix.tobytes("png")
59
  img = Image.open(io.BytesIO(img_data)).convert("RGB")
 
60
  # Resize if image is too large
61
  max_size = 1600
62
  if max(img.size) > max_size:
 
138
  # -------------------------------
139
  def chat_respond(user_message, history, prompt_option):
140
  """
141
+ Append the user message to the conversation history and call the API.
142
+ In case of an API error (such as unauthorized access), return an error message.
 
 
 
143
  The history is a list of [user_text, assistant_text] pairs.
144
  """
145
+ # If this is the first message and no message is provided, use the predetermined prompt.
146
  if history == []:
 
147
  if not user_message.strip():
148
  user_message = predetermined_prompts.get(prompt_option, "Hello")
149
  else:
 
150
  user_message = predetermined_prompts.get(prompt_option, "") + "\n" + user_message
151
 
 
152
  history = history + [[user_message, ""]]
153
 
154
+ # Build the messages list for the multimodal API from the conversation history.
155
  messages = []
156
  for i, (user_msg, assistant_msg) in enumerate(history):
 
157
  user_content = [{"type": "text", "text": user_msg}]
158
+ # For the very first user message, attach the image if available.
159
  if i == 0 and doc_state.current_doc_images:
160
  buffered = io.BytesIO()
161
  doc_state.current_doc_images[0].save(buffered, format="PNG")
 
166
  "image_url": {"url": data_uri}
167
  })
168
  messages.append({"role": "user", "content": user_content})
 
169
  if assistant_msg:
170
  messages.append({
171
  "role": "assistant",
172
  "content": [{"type": "text", "text": assistant_msg}]
173
  })
174
 
175
+ # Try to call the API with streaming enabled.
176
  try:
177
  stream = client.chat.completions.create(
178
+ model="google/gemini-2.0-flash-lite-preview-02-05:free",
179
  messages=messages,
180
  max_tokens=8192,
181
  stream=True
182
  )
183
  except Exception as e:
184
  logger.error(f"Error calling the API: {str(e)}")
185
+ history[-1][1] = "An error occurred while processing your request. Please check your API credentials."
186
  yield history, history
187
+ return
188
 
 
189
  buffer = ""
190
  for chunk in stream:
191
  delta = chunk.choices[0].delta.content
192
  buffer += delta
 
193
  history[-1][1] = buffer
 
194
  yield history, history
195
  time.sleep(0.01)
196
 
 
200
  # Create the Gradio Interface
201
  # -------------------------------
202
  with gr.Blocks() as demo:
203
+ gr.Markdown("# Document Analyzer & Software Testing Chatbot")
204
  gr.Markdown(
205
  "Upload a PDF or an image (PNG, JPG, JPEG, GIF, BMP, WEBP). Then choose a prompt from the dropdown. "
206
  "For example, select **Software Tester** to have the bot analyze an image of a software interface "
207
+ "and generate test cases. You can also chat with the model—the conversation history is preserved."
208
  )
209
 
210
  with gr.Row():
 
218
  prompt_dropdown = gr.Dropdown(
219
  label="Select Prompt",
220
  choices=[
221
+
222
  "Software Tester"
223
  ],
224
  value="Software Tester"
 
233
 
234
  # State to hold the conversation history
235
  chat_state = gr.State([])
236
+
237
  # When a file is uploaded, process it.
238
  file_upload.change(fn=process_uploaded_file, inputs=file_upload, outputs=upload_status)
239
 
240
+ # Clear both the document context and the chat history.
241
  clear_btn.click(fn=clear_context, outputs=[upload_status, chat_state])
242
 
243
  # When the user clicks Send, process the message and update the chat.
244
+ send_btn.click(
245
+ fn=chat_respond,
246
+ inputs=[user_input, chat_state, prompt_dropdown],
247
+ outputs=[chatbot, chat_state],
248
+ stream=True
249
+ )
250
 
251
  demo.launch(debug=True)