Spaces:

DHEIVER
/

Qwen2.5VL7BInstruct

Runtime error

prithivMLmods commited on Jan 9

Commit

4d0dad8

verified ·

1 Parent(s): 1b66eea

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -90,38 +90,47 @@ def generate(
             except Exception as e:
                 raise ValueError("Unsupported media type. Please upload an image.")
         messages = [
             {
                 "role": "user",
                 "content": [
-                    {
-                        "type": media_type,
-                        media_type: media_path,
-                    },
-                    {"type": "text", "text": message},
                 ],
             }
         ]
-        text = multimodal_processor.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
-        )
-        image_inputs = multimodal_processor(images=[media_path], return_tensors="pt").to("cuda")
         inputs = multimodal_processor(
-            text=[text],
-            images=image_inputs,
-            padding=True,
             return_tensors="pt",
         ).to("cuda")
         streamer = TextIteratorStreamer(
-            multimodal_processor, skip_prompt=True, **{"skip_special_tokens": True}
         )
-        generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
         thread = Thread(target=multimodal_model.generate, kwargs=generation_kwargs)
         thread.start()
         buffer = ""
         for new_text in streamer:
             buffer += new_text

             except Exception as e:
                 raise ValueError("Unsupported media type. Please upload an image.")
+        # Load the image
+        image = Image.open(media_path).convert("RGB")
+        # Prepare the input for the multimodal model
         messages = [
             {
                 "role": "user",
                 "content": [
+                    {"image": media_path},  # Pass the image path
+                    {"text": message},  # Pass the text prompt
                 ],
             }
         ]
+        # Process the input
         inputs = multimodal_processor(
+            messages,
             return_tensors="pt",
+            padding=True,
         ).to("cuda")
+        # Stream the output
         streamer = TextIteratorStreamer(
+            multimodal_processor, skip_prompt=True, skip_special_tokens=True
+        )
+        generation_kwargs = dict(
+            inputs,
+            streamer=streamer,
+            max_new_tokens=max_new_tokens,
+            do_sample=True,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k,
+            repetition_penalty=repetition_penalty,
         )
+        # Start the generation in a separate thread
         thread = Thread(target=multimodal_model.generate, kwargs=generation_kwargs)
         thread.start()
+        # Stream the output token by token
         buffer = ""
         for new_text in streamer:
             buffer += new_text