Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -19,7 +19,6 @@ from transformers import (
|
|
19 |
TextIteratorStreamer,
|
20 |
)
|
21 |
from transformers.image_utils import load_image
|
22 |
-
from pdf2image import convert_from_path
|
23 |
|
24 |
# Constants for text generation
|
25 |
MAX_MAX_NEW_TOKENS = 2048
|
@@ -85,16 +84,6 @@ def downsample_video(video_path):
|
|
85 |
vidcap.release()
|
86 |
return frames
|
87 |
|
88 |
-
# Function to convert PDF to image
|
89 |
-
def pdf_to_image(pdf_path):
|
90 |
-
"""
|
91 |
-
Converts a single-page PDF to a PIL image.
|
92 |
-
"""
|
93 |
-
images = convert_from_path(pdf_path)
|
94 |
-
if not images:
|
95 |
-
raise ValueError("Failed to convert PDF to image.")
|
96 |
-
return images[0] # Return the first page
|
97 |
-
|
98 |
# Function to generate text responses based on image input
|
99 |
@spaces.GPU
|
100 |
def generate_image(model_name: str,
|
@@ -240,37 +229,7 @@ def generate_video(model_name: str,
|
|
240 |
time.sleep(0.01)
|
241 |
yield buffer, buffer
|
242 |
|
243 |
-
#
|
244 |
-
@spaces.GPU
|
245 |
-
def generate_pdf(model_name: str,
|
246 |
-
text: str,
|
247 |
-
pdf_path: str,
|
248 |
-
max_new_tokens: int = 1024,
|
249 |
-
temperature: float = 0.6,
|
250 |
-
top_p: float = 0.9,
|
251 |
-
top_k: int = 50,
|
252 |
-
repetition_penalty: float = 1.2):
|
253 |
-
"""
|
254 |
-
Generates responses using the selected model for single-page PDF input by converting it to an image.
|
255 |
-
"""
|
256 |
-
try:
|
257 |
-
image = pdf_to_image(pdf_path)
|
258 |
-
except Exception as e:
|
259 |
-
yield f"Error converting PDF to image: {str(e)}", f"Error converting PDF to image: {str(e)}"
|
260 |
-
return
|
261 |
-
yield from generate_image(model_name, text, image, max_new_tokens, temperature, top_p, top_k, repetition_penalty)
|
262 |
-
|
263 |
-
# Function to save the output text to a Markdown file
|
264 |
-
def save_to_md(output_text):
|
265 |
-
"""
|
266 |
-
Saves the output text to a Markdown file and returns the file path for download.
|
267 |
-
"""
|
268 |
-
file_path = f"result_{uuid.uuid4()}.md"
|
269 |
-
with open(file_path, "w") as f:
|
270 |
-
f.write(output_text)
|
271 |
-
return file_path
|
272 |
-
|
273 |
-
# Define examples for image, video, and PDF inference
|
274 |
image_examples = [
|
275 |
["Solve the problem to find the value.", "images/1.jpg"],
|
276 |
["Explain the scene.", "images/6.JPG"],
|
@@ -283,12 +242,6 @@ image_examples = [
|
|
283 |
video_examples = [
|
284 |
["Explain the video in detail.", "videos/1.mp4"],
|
285 |
["Explain the video in detail.", "videos/2.mp4"]
|
286 |
-
|
287 |
-
]
|
288 |
-
|
289 |
-
pdf_examples = [
|
290 |
-
["Explain the content briefly.", "pdfs/1.pdf"],
|
291 |
-
["What is the content about?", "pdfs/2.pdf"]
|
292 |
]
|
293 |
|
294 |
# Added CSS to style the output area as a "Canvas"
|
@@ -333,15 +286,6 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
|
333 |
elem_classes="submit-btn")
|
334 |
gr.Examples(examples=video_examples,
|
335 |
inputs=[video_query, video_upload])
|
336 |
-
with gr.TabItem("PDF Inference"):
|
337 |
-
pdf_query = gr.Textbox(
|
338 |
-
label="Query Input",
|
339 |
-
placeholder="Enter your query here...")
|
340 |
-
pdf_upload = gr.File(label="Single Page PDF", type="filepath")
|
341 |
-
pdf_submit = gr.Button("Submit",
|
342 |
-
elem_classes="submit-btn")
|
343 |
-
gr.Examples(examples=pdf_examples,
|
344 |
-
inputs=[pdf_query, pdf_upload])
|
345 |
|
346 |
with gr.Accordion("Advanced options", open=False):
|
347 |
max_new_tokens = gr.Slider(label="Max new tokens",
|
@@ -411,20 +355,6 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
|
411 |
repetition_penalty
|
412 |
],
|
413 |
outputs=[output, markdown_output])
|
414 |
-
pdf_submit.click(fn=generate_pdf,
|
415 |
-
inputs=[
|
416 |
-
model_choice, pdf_query, pdf_upload,
|
417 |
-
max_new_tokens, temperature, top_p, top_k,
|
418 |
-
repetition_penalty
|
419 |
-
],
|
420 |
-
outputs=[output, markdown_output])
|
421 |
-
|
422 |
-
# Uncomment the following lines to enable download functionality(ps:no needed for now)
|
423 |
-
#download_btn.click(
|
424 |
-
# fn=save_to_md,
|
425 |
-
# inputs=output,
|
426 |
-
# outputs=None
|
427 |
-
#)
|
428 |
|
429 |
if __name__ == "__main__":
|
430 |
demo.queue(max_size=30).launch(share=True, mcp_server=True, ssr_mode=False, show_error=True)
|
|
|
19 |
TextIteratorStreamer,
|
20 |
)
|
21 |
from transformers.image_utils import load_image
|
|
|
22 |
|
23 |
# Constants for text generation
|
24 |
MAX_MAX_NEW_TOKENS = 2048
|
|
|
84 |
vidcap.release()
|
85 |
return frames
|
86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
# Function to generate text responses based on image input
|
88 |
@spaces.GPU
|
89 |
def generate_image(model_name: str,
|
|
|
229 |
time.sleep(0.01)
|
230 |
yield buffer, buffer
|
231 |
|
232 |
+
# Define examples for image and video inference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
image_examples = [
|
234 |
["Solve the problem to find the value.", "images/1.jpg"],
|
235 |
["Explain the scene.", "images/6.JPG"],
|
|
|
242 |
video_examples = [
|
243 |
["Explain the video in detail.", "videos/1.mp4"],
|
244 |
["Explain the video in detail.", "videos/2.mp4"]
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
]
|
246 |
|
247 |
# Added CSS to style the output area as a "Canvas"
|
|
|
286 |
elem_classes="submit-btn")
|
287 |
gr.Examples(examples=video_examples,
|
288 |
inputs=[video_query, video_upload])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
289 |
|
290 |
with gr.Accordion("Advanced options", open=False):
|
291 |
max_new_tokens = gr.Slider(label="Max new tokens",
|
|
|
355 |
repetition_penalty
|
356 |
],
|
357 |
outputs=[output, markdown_output])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
358 |
|
359 |
if __name__ == "__main__":
|
360 |
demo.queue(max_size=30).launch(share=True, mcp_server=True, ssr_mode=False, show_error=True)
|