Spaces:

DHEIVER
/

Qwen2.5VL7BInstruct

Runtime error

prithivMLmods commited on Jan 9

Commit

68abcc8

verified ·

1 Parent(s): 4d0dad8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -137,15 +137,23 @@ def generate(
             yield buffer
     else:
         # Text-only input
-        conversation = chat_history.copy()
         conversation.append({"role": "user", "content": message})
         input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
         if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
             input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
             gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
         input_ids = input_ids.to(model.device)
         streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
         generate_kwargs = dict(
             {"input_ids": input_ids},
@@ -223,4 +231,4 @@ demo = gr.ChatInterface(
 )
 if __name__ == "__main__":
-    demo.queue(max_size=20).launch()

             yield buffer
     else:
         # Text-only input
+        # Ensure the chat history alternates between user and assistant roles
+        conversation = []
+        for i, entry in enumerate(chat_history):
+            if i % 2 == 0:
+                conversation.append({"role": "user", "content": entry["content"]})
+            else:
+                conversation.append({"role": "assistant", "content": entry["content"]})
         conversation.append({"role": "user", "content": message})
+        # Apply the chat template
         input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
         if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
             input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
             gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
         input_ids = input_ids.to(model.device)
+        # Stream the output
         streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
         generate_kwargs = dict(
             {"input_ids": input_ids},
 )
 if __name__ == "__main__":
+    demo.queue(max_size=20).launch(share=True)