Spaces:
Running
Running
update
Browse files- eagle_vl/serve/inference.py +11 -4
eagle_vl/serve/inference.py
CHANGED
|
@@ -109,6 +109,8 @@ You understand you are Eagle 2, and may refer to yourself as such when asked."""
|
|
| 109 |
# converstion = get_conv_template(sft_format)
|
| 110 |
# only use the last 3 round of messages
|
| 111 |
# latest_messages = messages[-3:]
|
|
|
|
|
|
|
| 112 |
for mid, message in enumerate(messages):
|
| 113 |
if message["role"] == "user":
|
| 114 |
record = {
|
|
@@ -118,21 +120,24 @@ You understand you are Eagle 2, and may refer to yourself as such when asked."""
|
|
| 118 |
if "images" in message:
|
| 119 |
per_round_images = message["images"]
|
| 120 |
for image in per_round_images:
|
| 121 |
-
|
|
|
|
| 122 |
record["content"].append(
|
| 123 |
{
|
| 124 |
"type": "image",
|
| 125 |
"image": image,
|
| 126 |
}
|
| 127 |
)
|
| 128 |
-
|
|
|
|
| 129 |
record["content"].append(
|
| 130 |
{
|
| 131 |
"type": "image",
|
| 132 |
"image": image,
|
| 133 |
}
|
| 134 |
)
|
| 135 |
-
|
|
|
|
| 136 |
record["content"].append(
|
| 137 |
{
|
| 138 |
"type": "video",
|
|
@@ -140,6 +145,7 @@ You understand you are Eagle 2, and may refer to yourself as such when asked."""
|
|
| 140 |
"nframes": video_nframes,
|
| 141 |
}
|
| 142 |
)
|
|
|
|
| 143 |
if 'content' in message:
|
| 144 |
record["content"].append(
|
| 145 |
{
|
|
@@ -171,7 +177,8 @@ You understand you are Eagle 2, and may refer to yourself as such when asked."""
|
|
| 171 |
assert (
|
| 172 |
formatted_answer.count(processor.image_token) == 0
|
| 173 |
), f"there should be no {processor.image_token} in the assistant's reply, but got {messages}"
|
| 174 |
-
|
|
|
|
| 175 |
# print(f"messages = {results}")
|
| 176 |
text = processor.apply_chat_template(results, add_generation_prompt=False)
|
| 177 |
# print(f"raw text = {text}")
|
|
|
|
| 109 |
# converstion = get_conv_template(sft_format)
|
| 110 |
# only use the last 3 round of messages
|
| 111 |
# latest_messages = messages[-3:]
|
| 112 |
+
|
| 113 |
+
all_images_num = 0
|
| 114 |
for mid, message in enumerate(messages):
|
| 115 |
if message["role"] == "user":
|
| 116 |
record = {
|
|
|
|
| 120 |
if "images" in message:
|
| 121 |
per_round_images = message["images"]
|
| 122 |
for image in per_round_images:
|
| 123 |
+
|
| 124 |
+
if isinstance(image, Image.Image) and all_images_num < 128:
|
| 125 |
record["content"].append(
|
| 126 |
{
|
| 127 |
"type": "image",
|
| 128 |
"image": image,
|
| 129 |
}
|
| 130 |
)
|
| 131 |
+
all_images_num+=1
|
| 132 |
+
elif isinstance(image, str) and image.endswith((".jpeg", ".jpg", ".png", ".gif")) and all_images_num < 128:
|
| 133 |
record["content"].append(
|
| 134 |
{
|
| 135 |
"type": "image",
|
| 136 |
"image": image,
|
| 137 |
}
|
| 138 |
)
|
| 139 |
+
all_images_num+=1
|
| 140 |
+
elif isinstance(image, str) and image.endswith((".mp4", ".mov", ".avi", ".webm")) and all_images_num < 128-video_nframes:
|
| 141 |
record["content"].append(
|
| 142 |
{
|
| 143 |
"type": "video",
|
|
|
|
| 145 |
"nframes": video_nframes,
|
| 146 |
}
|
| 147 |
)
|
| 148 |
+
all_images_num+=video_nframes
|
| 149 |
if 'content' in message:
|
| 150 |
record["content"].append(
|
| 151 |
{
|
|
|
|
| 177 |
assert (
|
| 178 |
formatted_answer.count(processor.image_token) == 0
|
| 179 |
), f"there should be no {processor.image_token} in the assistant's reply, but got {messages}"
|
| 180 |
+
|
| 181 |
+
|
| 182 |
# print(f"messages = {results}")
|
| 183 |
text = processor.apply_chat_template(results, add_generation_prompt=False)
|
| 184 |
# print(f"raw text = {text}")
|