Spaces:

DHEIVER
/

Qwen2.5VL7BInstruct

Runtime error

App Files Files Community

prithivMLmods commited on Jan 9

Commit

1b66eea

verified ·

1 Parent(s): 6dee34a

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -14

app.py CHANGED Viewed

@@ -41,10 +41,9 @@ multimodal_model = Qwen2VLForConditionalGeneration.from_pretrained(
 multimodal_processor = AutoProcessor.from_pretrained(MULTIMODAL_MODEL_ID, trust_remote_code=True)
 image_extensions = Image.registered_extensions()
-video_extensions = ("avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg", "wav", "gif", "webm", "m4v", "3gp")
-def identify_and_save_blob(blob_path):
-    """Identifies if the blob is an image or video and saves it accordingly."""
     try:
         with open(blob_path, 'rb') as file:
             blob_content = file.read()
@@ -55,9 +54,7 @@ def identify_and_save_blob(blob_path):
                 extension = ".png"  # Default to PNG for saving
                 media_type = "image"
             except (IOError, SyntaxError):
-                # If it's not a valid image, assume it's a video
-                extension = ".mp4"  # Default to MP4 for saving
-                media_type = "video"
             # Create a unique filename
             filename = f"temp_{uuid.uuid4()}_media{extension}"
@@ -83,17 +80,15 @@ def generate(
     files: list = None,
 ) -> Iterator[str]:
     if files and len(files) > 0:
-        # Multimodal input
         media_path = files[0]
         if media_path.endswith(tuple([i for i, f in image_extensions.items()])):
             media_type = "image"
-        elif media_path.endswith(video_extensions):
-            media_type = "video"
         else:
             try:
-                media_path, media_type = identify_and_save_blob(media_path)
             except Exception as e:
-                raise ValueError("Unsupported media type. Please upload an image or video.")
         messages = [
             {
@@ -102,7 +97,6 @@ def generate(
                     {
                         "type": media_type,
                         media_type: media_path,
-                        **({"fps": 8.0} if media_type == "video" else {}),
                     },
                     {"type": "text", "text": message},
                 ],
@@ -112,11 +106,10 @@ def generate(
         text = multimodal_processor.apply_chat_template(
             messages, tokenize=False, add_generation_prompt=True
         )
-        image_inputs, video_inputs = process_vision_info(messages)
         inputs = multimodal_processor(
             text=[text],
             images=image_inputs,
-            videos=video_inputs,
             padding=True,
             return_tensors="pt",
         ).to("cuda")

 multimodal_processor = AutoProcessor.from_pretrained(MULTIMODAL_MODEL_ID, trust_remote_code=True)
 image_extensions = Image.registered_extensions()
+def identify_and_save_image(blob_path):
+    """Identifies if the blob is an image and saves it accordingly."""
     try:
         with open(blob_path, 'rb') as file:
             blob_content = file.read()
                 extension = ".png"  # Default to PNG for saving
                 media_type = "image"
             except (IOError, SyntaxError):
+                raise ValueError("Unsupported media type. Please upload an image.")
             # Create a unique filename
             filename = f"temp_{uuid.uuid4()}_media{extension}"
     files: list = None,
 ) -> Iterator[str]:
     if files and len(files) > 0:
+        # Multimodal input (image only)
         media_path = files[0]
         if media_path.endswith(tuple([i for i, f in image_extensions.items()])):
             media_type = "image"
         else:
             try:
+                media_path, media_type = identify_and_save_image(media_path)
             except Exception as e:
+                raise ValueError("Unsupported media type. Please upload an image.")
         messages = [
             {
                     {
                         "type": media_type,
                         media_type: media_path,
                     },
                     {"type": "text", "text": message},
                 ],
         text = multimodal_processor.apply_chat_template(
             messages, tokenize=False, add_generation_prompt=True
         )
+        image_inputs = multimodal_processor(images=[media_path], return_tensors="pt").to("cuda")
         inputs = multimodal_processor(
             text=[text],
             images=image_inputs,
             padding=True,
             return_tensors="pt",
         ).to("cuda")