Commit
·
7e5f599
1
Parent(s):
f16e094
udpate
Browse files
models/mllava/processing_llava.py
CHANGED
|
@@ -122,6 +122,10 @@ class MLlavaProcessor(ProcessorMixin):
|
|
| 122 |
# prepend empty image tokens to text
|
| 123 |
if "USER:" in t:
|
| 124 |
t = t.replace("USER:", "USER:" + "<image>" * (num_images - num_image_tokens), 1)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
else:
|
| 126 |
t = "<image>" * (num_images - num_image_tokens) + t
|
| 127 |
# logger.warning("Image Tokens <image> are not provided in the text. Automatically prepending them before the text. This might cause model to behave unexpectedly.")
|
|
@@ -133,6 +137,7 @@ class MLlavaProcessor(ProcessorMixin):
|
|
| 133 |
t = "".join(t)
|
| 134 |
logger.warning("Number of <image> tokens exceeds number of images. Automatically removing extra tokens at the end of the text.")
|
| 135 |
# raise ValueError("Invalid input text. Number of <image> tokens exceeds number of images.")
|
|
|
|
| 136 |
texts = text
|
| 137 |
else:
|
| 138 |
raise ValueError("Invalid input text. text must be a string or a list of strings.")
|
|
|
|
| 122 |
# prepend empty image tokens to text
|
| 123 |
if "USER:" in t:
|
| 124 |
t = t.replace("USER:", "USER:" + "<image>" * (num_images - num_image_tokens), 1)
|
| 125 |
+
elif "Human:" in t:
|
| 126 |
+
t = t.replace("Human:", "Human:" + "<image>" * (num_images - num_image_tokens), 1)
|
| 127 |
+
elif "HUMAN:" in t:
|
| 128 |
+
t = t.replace("HUMAN:", "HUMAN:" + "<image>" * (num_images - num_image_tokens), 1)
|
| 129 |
else:
|
| 130 |
t = "<image>" * (num_images - num_image_tokens) + t
|
| 131 |
# logger.warning("Image Tokens <image> are not provided in the text. Automatically prepending them before the text. This might cause model to behave unexpectedly.")
|
|
|
|
| 137 |
t = "".join(t)
|
| 138 |
logger.warning("Number of <image> tokens exceeds number of images. Automatically removing extra tokens at the end of the text.")
|
| 139 |
# raise ValueError("Invalid input text. Number of <image> tokens exceeds number of images.")
|
| 140 |
+
text[i] = t
|
| 141 |
texts = text
|
| 142 |
else:
|
| 143 |
raise ValueError("Invalid input text. text must be a string or a list of strings.")
|