Spaces:

prithivMLmods
/

Multimodal-VLM-Thinking

Running on Zero

prithivMLmods commited on Oct 15

Commit

4c19805

verified ·

1 Parent(s): eac47c7

update app

Files changed (1) hide show

app.py CHANGED Viewed

@@ -109,7 +109,7 @@ css = """
 """
 MAX_MAX_NEW_TOKENS = 4096
-DEFAULT_MAX_NEW_TOKENS = 2048
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -223,9 +223,7 @@ def generate_image(model_name: str, text: str, image: Image.Image,
     messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": text}]}]
     prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = processor(
-        text=[prompt_full], images=[image], return_tensors="pt", padding=True,
-        truncation=True, max_length=MAX_INPUT_TOKEN_LENGTH
-    ).to(device)
     streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
     generation_kwargs = {**inputs, "streamer": streamer, "max_new_tokens": max_new_tokens}
     thread = Thread(target=model.generate, kwargs=generation_kwargs)

 """
 MAX_MAX_NEW_TOKENS = 4096
+DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": text}]}]
     prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     inputs = processor(
+        text=[prompt_full], images=[image], return_tensors="pt", padding=True).to(device)
     streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
     generation_kwargs = {**inputs, "streamer": streamer, "max_new_tokens": max_new_tokens}
     thread = Thread(target=model.generate, kwargs=generation_kwargs)