Spaces:
Sleeping
Sleeping
Update vlm.py
Browse files
vlm.py
CHANGED
@@ -49,39 +49,21 @@ def encode_image(image_path):
|
|
49 |
#
|
50 |
# Build messages
|
51 |
#
|
52 |
-
def build_messages(message: dict, history: list[
|
53 |
"""Build messages given message & history from a **multimodal** chat interface.
|
54 |
Args:
|
55 |
message: dictionary with keys: 'text', 'files'
|
56 |
-
history: list of
|
57 |
|
58 |
Returns:
|
59 |
list of messages (to be sent to the model)
|
60 |
"""
|
61 |
logger.info(f"{message=}")
|
62 |
logger.info(f"{history=}")
|
|
|
63 |
# Get the user's text and list of images
|
64 |
user_text = message.get("text", "")
|
65 |
user_images = message.get("files", []) # List of images
|
66 |
-
|
67 |
-
# Build the message list including history
|
68 |
-
messages = []
|
69 |
-
combined_user_input = [] # Combine images and text if found in same turn.
|
70 |
-
for user_turn, bot_turn in history:
|
71 |
-
if isinstance(user_turn, tuple): # Image input
|
72 |
-
image_content = [
|
73 |
-
{
|
74 |
-
"type": "image_url",
|
75 |
-
"image_url": f"data:image/jpeg;base64,{encode_image(image)}"
|
76 |
-
} for image in user_turn
|
77 |
-
]
|
78 |
-
combined_user_input.extend(image_content)
|
79 |
-
elif isinstance(user_turn, str): # Text input
|
80 |
-
combined_user_input.append({"type": "text", "text": user_turn})
|
81 |
-
if combined_user_input and bot_turn:
|
82 |
-
messages.append({'role': 'user', 'content': combined_user_input})
|
83 |
-
messages.append({'role': 'assistant', 'content': [{"type": "text", "text": bot_turn}]})
|
84 |
-
combined_user_input = [] #reset the combined user input.
|
85 |
|
86 |
# Build the user message's content from the provided message
|
87 |
user_content = []
|
@@ -94,7 +76,9 @@ def build_messages(message: dict, history: list[tuple]):
|
|
94 |
"image_url": f"data:image/jpeg;base64,{encode_image(image)}"
|
95 |
}
|
96 |
)
|
97 |
-
|
|
|
|
|
98 |
messages.append({'role': 'user', 'content': user_content})
|
99 |
logger.info(f"{messages=}")
|
100 |
|
|
|
49 |
#
|
50 |
# Build messages
|
51 |
#
|
52 |
+
def build_messages(message: dict, history: list[dict]):
|
53 |
"""Build messages given message & history from a **multimodal** chat interface.
|
54 |
Args:
|
55 |
message: dictionary with keys: 'text', 'files'
|
56 |
+
history: list of dictionaries
|
57 |
|
58 |
Returns:
|
59 |
list of messages (to be sent to the model)
|
60 |
"""
|
61 |
logger.info(f"{message=}")
|
62 |
logger.info(f"{history=}")
|
63 |
+
|
64 |
# Get the user's text and list of images
|
65 |
user_text = message.get("text", "")
|
66 |
user_images = message.get("files", []) # List of images
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
# Build the user message's content from the provided message
|
69 |
user_content = []
|
|
|
76 |
"image_url": f"data:image/jpeg;base64,{encode_image(image)}"
|
77 |
}
|
78 |
)
|
79 |
+
|
80 |
+
# Append to the history to create the new messages
|
81 |
+
messages = history
|
82 |
messages.append({'role': 'user', 'content': user_content})
|
83 |
logger.info(f"{messages=}")
|
84 |
|