prithivMLmods commited on
Commit
9074b0b
·
verified ·
1 Parent(s): e972111

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -1
app.py CHANGED
@@ -12,6 +12,7 @@ import torch
12
  import numpy as np
13
  from PIL import Image
14
  import cv2
 
15
 
16
  from transformers import (
17
  Qwen2VLForConditionalGeneration,
@@ -65,6 +66,15 @@ model_j = Qwen2_5_VLForConditionalGeneration.from_pretrained(
65
  torch_dtype=torch.float16
66
  ).to(device).eval()
67
 
 
 
 
 
 
 
 
 
 
68
  def downsample_video(video_path):
69
  """
70
  Downsamples the video to evenly spaced frames.
@@ -108,6 +118,9 @@ def generate_image(model_name: str, text: str, image: Image.Image,
108
  elif model_name == "Lumian-VLR-7B-Thinking":
109
  processor = processor_j
110
  model = model_j
 
 
 
111
  else:
112
  yield "Invalid model selected.", "Invalid model selected."
113
  return
@@ -164,6 +177,9 @@ def generate_video(model_name: str, text: str, video_path: str,
164
  elif model_name == "Lumian-VLR-7B-Thinking":
165
  processor = processor_j
166
  model = model_j
 
 
 
167
  else:
168
  yield "Invalid model selected.", "Invalid model selected."
169
  return
@@ -286,7 +302,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
286
  markdown_output = gr.Markdown(label="(Result.Md)")
287
 
288
  model_choice = gr.Radio(
289
- choices=["Lumian-VLR-7B-Thinking", "DREX-062225-7B-exp", "olmOCR-7B-0225-preview", "Typhoon-OCR-3B"],
290
  label="Select Model",
291
  value="Lumian-VLR-7B-Thinking"
292
  )
 
12
  import numpy as np
13
  from PIL import Image
14
  import cv2
15
+ import requests
16
 
17
  from transformers import (
18
  Qwen2VLForConditionalGeneration,
 
66
  torch_dtype=torch.float16
67
  ).to(device).eval()
68
 
69
+ # Load medgemma-4b-it
70
+ MODEL_ID_F = "google/medgemma-4b-it"
71
+ processor_f = AutoProcessor.from_pretrained(MODEL_ID_F, trust_remote_code=True)
72
+ model_f = AutoModelForImageTextToText.from_pretrained(
73
+ MODEL_ID_F,
74
+ trust_remote_code=True,
75
+ torch_dtype=torch.float16
76
+ ).to(device).eval()
77
+
78
  def downsample_video(video_path):
79
  """
80
  Downsamples the video to evenly spaced frames.
 
118
  elif model_name == "Lumian-VLR-7B-Thinking":
119
  processor = processor_j
120
  model = model_j
121
+ elif model_name == "medgemma-4b-it":
122
+ processor = processor_f
123
+ model = model_f
124
  else:
125
  yield "Invalid model selected.", "Invalid model selected."
126
  return
 
177
  elif model_name == "Lumian-VLR-7B-Thinking":
178
  processor = processor_j
179
  model = model_j
180
+ elif model_name == "medgemma-4b-it":
181
+ processor = processor_f
182
+ model = model_f
183
  else:
184
  yield "Invalid model selected.", "Invalid model selected."
185
  return
 
302
  markdown_output = gr.Markdown(label="(Result.Md)")
303
 
304
  model_choice = gr.Radio(
305
+ choices=["Lumian-VLR-7B-Thinking", "DREX-062225-7B-exp", "olmOCR-7B-0225-preview", "medgemma-4b-it", "Typhoon-OCR-3B"],
306
  label="Select Model",
307
  value="Lumian-VLR-7B-Thinking"
308
  )