prithivMLmods commited on
Commit
a649be3
·
verified ·
1 Parent(s): 5d4f983

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -71
app.py CHANGED
@@ -19,7 +19,6 @@ from transformers import (
19
  TextIteratorStreamer,
20
  )
21
  from transformers.image_utils import load_image
22
- from pdf2image import convert_from_path
23
 
24
  # Constants for text generation
25
  MAX_MAX_NEW_TOKENS = 2048
@@ -85,16 +84,6 @@ def downsample_video(video_path):
85
  vidcap.release()
86
  return frames
87
 
88
- # Function to convert PDF to image
89
- def pdf_to_image(pdf_path):
90
- """
91
- Converts a single-page PDF to a PIL image.
92
- """
93
- images = convert_from_path(pdf_path)
94
- if not images:
95
- raise ValueError("Failed to convert PDF to image.")
96
- return images[0] # Return the first page
97
-
98
  # Function to generate text responses based on image input
99
  @spaces.GPU
100
  def generate_image(model_name: str,
@@ -240,37 +229,7 @@ def generate_video(model_name: str,
240
  time.sleep(0.01)
241
  yield buffer, buffer
242
 
243
- # Function to generate text responses based on PDF input
244
- @spaces.GPU
245
- def generate_pdf(model_name: str,
246
- text: str,
247
- pdf_path: str,
248
- max_new_tokens: int = 1024,
249
- temperature: float = 0.6,
250
- top_p: float = 0.9,
251
- top_k: int = 50,
252
- repetition_penalty: float = 1.2):
253
- """
254
- Generates responses using the selected model for single-page PDF input by converting it to an image.
255
- """
256
- try:
257
- image = pdf_to_image(pdf_path)
258
- except Exception as e:
259
- yield f"Error converting PDF to image: {str(e)}", f"Error converting PDF to image: {str(e)}"
260
- return
261
- yield from generate_image(model_name, text, image, max_new_tokens, temperature, top_p, top_k, repetition_penalty)
262
-
263
- # Function to save the output text to a Markdown file
264
- def save_to_md(output_text):
265
- """
266
- Saves the output text to a Markdown file and returns the file path for download.
267
- """
268
- file_path = f"result_{uuid.uuid4()}.md"
269
- with open(file_path, "w") as f:
270
- f.write(output_text)
271
- return file_path
272
-
273
- # Define examples for image, video, and PDF inference
274
  image_examples = [
275
  ["Solve the problem to find the value.", "images/1.jpg"],
276
  ["Explain the scene.", "images/6.JPG"],
@@ -283,12 +242,6 @@ image_examples = [
283
  video_examples = [
284
  ["Explain the video in detail.", "videos/1.mp4"],
285
  ["Explain the video in detail.", "videos/2.mp4"]
286
-
287
- ]
288
-
289
- pdf_examples = [
290
- ["Explain the content briefly.", "pdfs/1.pdf"],
291
- ["What is the content about?", "pdfs/2.pdf"]
292
  ]
293
 
294
  # Added CSS to style the output area as a "Canvas"
@@ -333,15 +286,6 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
333
  elem_classes="submit-btn")
334
  gr.Examples(examples=video_examples,
335
  inputs=[video_query, video_upload])
336
- with gr.TabItem("PDF Inference"):
337
- pdf_query = gr.Textbox(
338
- label="Query Input",
339
- placeholder="Enter your query here...")
340
- pdf_upload = gr.File(label="Single Page PDF", type="filepath")
341
- pdf_submit = gr.Button("Submit",
342
- elem_classes="submit-btn")
343
- gr.Examples(examples=pdf_examples,
344
- inputs=[pdf_query, pdf_upload])
345
 
346
  with gr.Accordion("Advanced options", open=False):
347
  max_new_tokens = gr.Slider(label="Max new tokens",
@@ -411,20 +355,6 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
411
  repetition_penalty
412
  ],
413
  outputs=[output, markdown_output])
414
- pdf_submit.click(fn=generate_pdf,
415
- inputs=[
416
- model_choice, pdf_query, pdf_upload,
417
- max_new_tokens, temperature, top_p, top_k,
418
- repetition_penalty
419
- ],
420
- outputs=[output, markdown_output])
421
-
422
- # Uncomment the following lines to enable download functionality(ps:no needed for now)
423
- #download_btn.click(
424
- # fn=save_to_md,
425
- # inputs=output,
426
- # outputs=None
427
- #)
428
 
429
  if __name__ == "__main__":
430
  demo.queue(max_size=30).launch(share=True, mcp_server=True, ssr_mode=False, show_error=True)
 
19
  TextIteratorStreamer,
20
  )
21
  from transformers.image_utils import load_image
 
22
 
23
  # Constants for text generation
24
  MAX_MAX_NEW_TOKENS = 2048
 
84
  vidcap.release()
85
  return frames
86
 
 
 
 
 
 
 
 
 
 
 
87
  # Function to generate text responses based on image input
88
  @spaces.GPU
89
  def generate_image(model_name: str,
 
229
  time.sleep(0.01)
230
  yield buffer, buffer
231
 
232
+ # Define examples for image and video inference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  image_examples = [
234
  ["Solve the problem to find the value.", "images/1.jpg"],
235
  ["Explain the scene.", "images/6.JPG"],
 
242
  video_examples = [
243
  ["Explain the video in detail.", "videos/1.mp4"],
244
  ["Explain the video in detail.", "videos/2.mp4"]
 
 
 
 
 
 
245
  ]
246
 
247
  # Added CSS to style the output area as a "Canvas"
 
286
  elem_classes="submit-btn")
287
  gr.Examples(examples=video_examples,
288
  inputs=[video_query, video_upload])
 
 
 
 
 
 
 
 
 
289
 
290
  with gr.Accordion("Advanced options", open=False):
291
  max_new_tokens = gr.Slider(label="Max new tokens",
 
355
  repetition_penalty
356
  ],
357
  outputs=[output, markdown_output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
 
359
  if __name__ == "__main__":
360
  demo.queue(max_size=30).launch(share=True, mcp_server=True, ssr_mode=False, show_error=True)