qqwjq1981 commited on
Commit
311f05f
Β·
verified Β·
1 Parent(s): 94b8a22

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -1,8 +1,8 @@
1
- # app.py
2
  import gradio as gr
3
  from transformers import AutoModelForVision2Seq, AutoProcessor
4
  import torch
5
  from PIL import Image
 
6
 
7
  # Load Qwen-VL model and processor
8
  model_id = "Qwen/Qwen-VL-Chat"
@@ -11,6 +11,9 @@ model = AutoModelForVision2Seq.from_pretrained(model_id, torch_dtype=torch.float
11
 
12
  # Inference function
13
  def ocr_with_qwen(image):
 
 
 
14
  prompt = "<|im_start|>system\nYou are a helpful assistant. Extract all text from the image and output only the text.<|im_end|>\n<|im_start|>user\n"
15
  inputs = processor(images=image, text=prompt, return_tensors="pt").to(model.device)
16
  outputs = model.generate(**inputs, max_new_tokens=512)
@@ -20,8 +23,9 @@ def ocr_with_qwen(image):
20
  # Gradio UI
21
  gr.Interface(
22
  fn=ocr_with_qwen,
23
- inputs=gr.Image(type="pil", label="Upload Image (test.jpg)"),
24
  outputs=gr.Textbox(label="Extracted Text"),
25
  title="OCR with Qwen2.5-VL",
26
- description="Upload an image to extract text using Qwen-VL model."
 
27
  ).launch()
 
 
1
  import gradio as gr
2
  from transformers import AutoModelForVision2Seq, AutoProcessor
3
  import torch
4
  from PIL import Image
5
+ import os
6
 
7
  # Load Qwen-VL model and processor
8
  model_id = "Qwen/Qwen-VL-Chat"
 
11
 
12
  # Inference function
13
  def ocr_with_qwen(image):
14
+ if image is None:
15
+ image = Image.open("test.png")
16
+
17
  prompt = "<|im_start|>system\nYou are a helpful assistant. Extract all text from the image and output only the text.<|im_end|>\n<|im_start|>user\n"
18
  inputs = processor(images=image, text=prompt, return_tensors="pt").to(model.device)
19
  outputs = model.generate(**inputs, max_new_tokens=512)
 
23
  # Gradio UI
24
  gr.Interface(
25
  fn=ocr_with_qwen,
26
+ inputs=gr.Image(type="pil", label="Upload Image (defaults to test.png if none uploaded)", optional=True),
27
  outputs=gr.Textbox(label="Extracted Text"),
28
  title="OCR with Qwen2.5-VL",
29
+ description="Upload an image to extract text using Qwen-VL model. If no image is uploaded, test.png is used.",
30
+ examples=[["test.png"]]
31
  ).launch()