Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -41,10 +41,9 @@ multimodal_model = Qwen2VLForConditionalGeneration.from_pretrained(
|
|
41 |
multimodal_processor = AutoProcessor.from_pretrained(MULTIMODAL_MODEL_ID, trust_remote_code=True)
|
42 |
|
43 |
image_extensions = Image.registered_extensions()
|
44 |
-
video_extensions = ("avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg", "wav", "gif", "webm", "m4v", "3gp")
|
45 |
|
46 |
-
def
|
47 |
-
"""Identifies if the blob is an image
|
48 |
try:
|
49 |
with open(blob_path, 'rb') as file:
|
50 |
blob_content = file.read()
|
@@ -55,9 +54,7 @@ def identify_and_save_blob(blob_path):
|
|
55 |
extension = ".png" # Default to PNG for saving
|
56 |
media_type = "image"
|
57 |
except (IOError, SyntaxError):
|
58 |
-
|
59 |
-
extension = ".mp4" # Default to MP4 for saving
|
60 |
-
media_type = "video"
|
61 |
|
62 |
# Create a unique filename
|
63 |
filename = f"temp_{uuid.uuid4()}_media{extension}"
|
@@ -83,17 +80,15 @@ def generate(
|
|
83 |
files: list = None,
|
84 |
) -> Iterator[str]:
|
85 |
if files and len(files) > 0:
|
86 |
-
# Multimodal input
|
87 |
media_path = files[0]
|
88 |
if media_path.endswith(tuple([i for i, f in image_extensions.items()])):
|
89 |
media_type = "image"
|
90 |
-
elif media_path.endswith(video_extensions):
|
91 |
-
media_type = "video"
|
92 |
else:
|
93 |
try:
|
94 |
-
media_path, media_type =
|
95 |
except Exception as e:
|
96 |
-
raise ValueError("Unsupported media type. Please upload an image
|
97 |
|
98 |
messages = [
|
99 |
{
|
@@ -102,7 +97,6 @@ def generate(
|
|
102 |
{
|
103 |
"type": media_type,
|
104 |
media_type: media_path,
|
105 |
-
**({"fps": 8.0} if media_type == "video" else {}),
|
106 |
},
|
107 |
{"type": "text", "text": message},
|
108 |
],
|
@@ -112,11 +106,10 @@ def generate(
|
|
112 |
text = multimodal_processor.apply_chat_template(
|
113 |
messages, tokenize=False, add_generation_prompt=True
|
114 |
)
|
115 |
-
image_inputs
|
116 |
inputs = multimodal_processor(
|
117 |
text=[text],
|
118 |
images=image_inputs,
|
119 |
-
videos=video_inputs,
|
120 |
padding=True,
|
121 |
return_tensors="pt",
|
122 |
).to("cuda")
|
|
|
41 |
multimodal_processor = AutoProcessor.from_pretrained(MULTIMODAL_MODEL_ID, trust_remote_code=True)
|
42 |
|
43 |
image_extensions = Image.registered_extensions()
|
|
|
44 |
|
45 |
+
def identify_and_save_image(blob_path):
|
46 |
+
"""Identifies if the blob is an image and saves it accordingly."""
|
47 |
try:
|
48 |
with open(blob_path, 'rb') as file:
|
49 |
blob_content = file.read()
|
|
|
54 |
extension = ".png" # Default to PNG for saving
|
55 |
media_type = "image"
|
56 |
except (IOError, SyntaxError):
|
57 |
+
raise ValueError("Unsupported media type. Please upload an image.")
|
|
|
|
|
58 |
|
59 |
# Create a unique filename
|
60 |
filename = f"temp_{uuid.uuid4()}_media{extension}"
|
|
|
80 |
files: list = None,
|
81 |
) -> Iterator[str]:
|
82 |
if files and len(files) > 0:
|
83 |
+
# Multimodal input (image only)
|
84 |
media_path = files[0]
|
85 |
if media_path.endswith(tuple([i for i, f in image_extensions.items()])):
|
86 |
media_type = "image"
|
|
|
|
|
87 |
else:
|
88 |
try:
|
89 |
+
media_path, media_type = identify_and_save_image(media_path)
|
90 |
except Exception as e:
|
91 |
+
raise ValueError("Unsupported media type. Please upload an image.")
|
92 |
|
93 |
messages = [
|
94 |
{
|
|
|
97 |
{
|
98 |
"type": media_type,
|
99 |
media_type: media_path,
|
|
|
100 |
},
|
101 |
{"type": "text", "text": message},
|
102 |
],
|
|
|
106 |
text = multimodal_processor.apply_chat_template(
|
107 |
messages, tokenize=False, add_generation_prompt=True
|
108 |
)
|
109 |
+
image_inputs = multimodal_processor(images=[media_path], return_tensors="pt").to("cuda")
|
110 |
inputs = multimodal_processor(
|
111 |
text=[text],
|
112 |
images=image_inputs,
|
|
|
113 |
padding=True,
|
114 |
return_tensors="pt",
|
115 |
).to("cuda")
|