prithivMLmods commited on
Commit
1b66eea
Β·
verified Β·
1 Parent(s): 6dee34a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -14
app.py CHANGED
@@ -41,10 +41,9 @@ multimodal_model = Qwen2VLForConditionalGeneration.from_pretrained(
41
  multimodal_processor = AutoProcessor.from_pretrained(MULTIMODAL_MODEL_ID, trust_remote_code=True)
42
 
43
  image_extensions = Image.registered_extensions()
44
- video_extensions = ("avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg", "wav", "gif", "webm", "m4v", "3gp")
45
 
46
- def identify_and_save_blob(blob_path):
47
- """Identifies if the blob is an image or video and saves it accordingly."""
48
  try:
49
  with open(blob_path, 'rb') as file:
50
  blob_content = file.read()
@@ -55,9 +54,7 @@ def identify_and_save_blob(blob_path):
55
  extension = ".png" # Default to PNG for saving
56
  media_type = "image"
57
  except (IOError, SyntaxError):
58
- # If it's not a valid image, assume it's a video
59
- extension = ".mp4" # Default to MP4 for saving
60
- media_type = "video"
61
 
62
  # Create a unique filename
63
  filename = f"temp_{uuid.uuid4()}_media{extension}"
@@ -83,17 +80,15 @@ def generate(
83
  files: list = None,
84
  ) -> Iterator[str]:
85
  if files and len(files) > 0:
86
- # Multimodal input
87
  media_path = files[0]
88
  if media_path.endswith(tuple([i for i, f in image_extensions.items()])):
89
  media_type = "image"
90
- elif media_path.endswith(video_extensions):
91
- media_type = "video"
92
  else:
93
  try:
94
- media_path, media_type = identify_and_save_blob(media_path)
95
  except Exception as e:
96
- raise ValueError("Unsupported media type. Please upload an image or video.")
97
 
98
  messages = [
99
  {
@@ -102,7 +97,6 @@ def generate(
102
  {
103
  "type": media_type,
104
  media_type: media_path,
105
- **({"fps": 8.0} if media_type == "video" else {}),
106
  },
107
  {"type": "text", "text": message},
108
  ],
@@ -112,11 +106,10 @@ def generate(
112
  text = multimodal_processor.apply_chat_template(
113
  messages, tokenize=False, add_generation_prompt=True
114
  )
115
- image_inputs, video_inputs = process_vision_info(messages)
116
  inputs = multimodal_processor(
117
  text=[text],
118
  images=image_inputs,
119
- videos=video_inputs,
120
  padding=True,
121
  return_tensors="pt",
122
  ).to("cuda")
 
41
  multimodal_processor = AutoProcessor.from_pretrained(MULTIMODAL_MODEL_ID, trust_remote_code=True)
42
 
43
  image_extensions = Image.registered_extensions()
 
44
 
45
+ def identify_and_save_image(blob_path):
46
+ """Identifies if the blob is an image and saves it accordingly."""
47
  try:
48
  with open(blob_path, 'rb') as file:
49
  blob_content = file.read()
 
54
  extension = ".png" # Default to PNG for saving
55
  media_type = "image"
56
  except (IOError, SyntaxError):
57
+ raise ValueError("Unsupported media type. Please upload an image.")
 
 
58
 
59
  # Create a unique filename
60
  filename = f"temp_{uuid.uuid4()}_media{extension}"
 
80
  files: list = None,
81
  ) -> Iterator[str]:
82
  if files and len(files) > 0:
83
+ # Multimodal input (image only)
84
  media_path = files[0]
85
  if media_path.endswith(tuple([i for i, f in image_extensions.items()])):
86
  media_type = "image"
 
 
87
  else:
88
  try:
89
+ media_path, media_type = identify_and_save_image(media_path)
90
  except Exception as e:
91
+ raise ValueError("Unsupported media type. Please upload an image.")
92
 
93
  messages = [
94
  {
 
97
  {
98
  "type": media_type,
99
  media_type: media_path,
 
100
  },
101
  {"type": "text", "text": message},
102
  ],
 
106
  text = multimodal_processor.apply_chat_template(
107
  messages, tokenize=False, add_generation_prompt=True
108
  )
109
+ image_inputs = multimodal_processor(images=[media_path], return_tensors="pt").to("cuda")
110
  inputs = multimodal_processor(
111
  text=[text],
112
  images=image_inputs,
 
113
  padding=True,
114
  return_tensors="pt",
115
  ).to("cuda")