Spaces:
Runtime error
Runtime error
Commit
·
5011842
1
Parent(s):
ec50e73
looots of fixes
Browse files- app_dialogue.py +103 -121
app_dialogue.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import copy
|
| 2 |
import hashlib
|
| 3 |
-
import logging
|
| 4 |
import os
|
| 5 |
import re
|
| 6 |
import torch
|
|
@@ -15,7 +14,7 @@ from PIL import Image
|
|
| 15 |
import gradio as gr
|
| 16 |
from gradio import processing_utils
|
| 17 |
from gradio_client.client import DEFAULT_TEMP_DIR
|
| 18 |
-
from transformers import AutoProcessor, AutoModelForCausalLM, TextIteratorStreamer
|
| 19 |
|
| 20 |
from utils import create_model_inputs
|
| 21 |
|
|
@@ -27,18 +26,16 @@ MODELS = {
|
|
| 27 |
trust_remote_code=True,
|
| 28 |
torch_dtype=torch.bfloat16,
|
| 29 |
token=os.environ["HF_AUTH_TOKEN"],
|
| 30 |
-
|
| 31 |
).to(DEVICE),
|
| 32 |
"HuggingFaceM4/idefics2": AutoModelForCausalLM.from_pretrained(
|
| 33 |
"HuggingFaceM4/idefics2",
|
| 34 |
trust_remote_code=True,
|
| 35 |
torch_dtype=torch.bfloat16,
|
| 36 |
token=os.environ["HF_AUTH_TOKEN"],
|
| 37 |
-
|
| 38 |
).to(DEVICE),
|
| 39 |
}
|
| 40 |
-
|
| 41 |
-
|
| 42 |
PROCESSOR = AutoProcessor.from_pretrained(
|
| 43 |
"HuggingFaceM4/idefics2",
|
| 44 |
token=os.environ["HF_AUTH_TOKEN"],
|
|
@@ -75,24 +72,10 @@ SYSTEM_PROMPT = [
|
|
| 75 |
# """\nAssistant: There is no dogs in this image. The picture shows a tennis player jumping to volley the ball.<end_of_utterance>""",
|
| 76 |
]
|
| 77 |
|
| 78 |
-
BAN_TOKENS = ( # For documentation puporse. We are not using this list, it is hardcoded inside `idefics_causal_lm.py` inside TGI.
|
| 79 |
-
"<image>;<fake_token_around_image>"
|
| 80 |
-
)
|
| 81 |
-
STOP_SUSPECT_LIST = []
|
| 82 |
-
|
| 83 |
API_TOKEN = os.getenv("HF_AUTH_TOKEN")
|
| 84 |
# IDEFICS_LOGO = "https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/resolve/main/IDEFICS_logo.png"
|
| 85 |
-
|
| 86 |
-
PROCESSOR = AutoProcessor.from_pretrained(
|
| 87 |
-
"HuggingFaceM4/idefics-9b-instruct",
|
| 88 |
-
token=API_TOKEN,
|
| 89 |
-
)
|
| 90 |
-
|
| 91 |
BOT_AVATAR = "IDEFICS_logo.png"
|
| 92 |
|
| 93 |
-
logging.basicConfig(level=logging.INFO)
|
| 94 |
-
logger = logging.getLogger()
|
| 95 |
-
|
| 96 |
|
| 97 |
# Monkey patch adapted from gradio.components.image.Image - mostly to make the `save` step optional in `pil_to_temp_file`
|
| 98 |
def hash_bytes(bytes: bytes):
|
|
@@ -247,6 +230,25 @@ def prompt_list_to_markdown(prompt_list: List[str]) -> str:
|
|
| 247 |
resulting_string += elem
|
| 248 |
return resulting_string
|
| 249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
def remove_spaces_around_token(text: str) -> str:
|
| 251 |
pattern = r"\s*(<fake_token_around_image>)\s*"
|
| 252 |
replacement = r"\1"
|
|
@@ -482,17 +484,7 @@ with gr.Blocks(title="IDEFICS Playground", theme=gr.themes.Base()) as demo:
|
|
| 482 |
)
|
| 483 |
|
| 484 |
# Creating model inputs
|
| 485 |
-
images =
|
| 486 |
-
for idx, part in enumerate(formated_prompt_list):
|
| 487 |
-
if is_image(part):
|
| 488 |
-
if is_url(part):
|
| 489 |
-
images.append(fetch_images([part])[0])
|
| 490 |
-
else:
|
| 491 |
-
images.append(Image.open(part))
|
| 492 |
-
formated_prompt_list[idx] = f"{FAKE_TOK_AROUND_IMAGE}{'<image>' * IMAGE_SEQ_LEN}{FAKE_TOK_AROUND_IMAGE}"
|
| 493 |
-
input_text = "".join(formated_prompt_list)
|
| 494 |
-
input_text = input_text.replace(FAKE_TOK_AROUND_IMAGE * 2, FAKE_TOK_AROUND_IMAGE)
|
| 495 |
-
input_text = BOS_TOKEN + input_text
|
| 496 |
inputs = create_model_inputs([input_text], [images])
|
| 497 |
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
| 498 |
generation_args.update(inputs)
|
|
@@ -558,17 +550,7 @@ with gr.Blocks(title="IDEFICS Playground", theme=gr.themes.Base()) as demo:
|
|
| 558 |
)
|
| 559 |
|
| 560 |
# Creating model inputs
|
| 561 |
-
images =
|
| 562 |
-
for idx, part in enumerate(formated_prompt_list):
|
| 563 |
-
if is_image(part):
|
| 564 |
-
if is_url(part):
|
| 565 |
-
images.append(fetch_images([part])[0])
|
| 566 |
-
else:
|
| 567 |
-
images.append(Image.open(part))
|
| 568 |
-
formated_prompt_list[idx] = f"{FAKE_TOK_AROUND_IMAGE}{'<image>' * IMAGE_SEQ_LEN}{FAKE_TOK_AROUND_IMAGE}"
|
| 569 |
-
input_text = "".join(formated_prompt_list)
|
| 570 |
-
input_text = input_text.replace(FAKE_TOK_AROUND_IMAGE * 2, FAKE_TOK_AROUND_IMAGE)
|
| 571 |
-
input_text = BOS_TOKEN + input_text
|
| 572 |
inputs = create_model_inputs([input_text], [images])
|
| 573 |
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
| 574 |
generation_args.update(inputs)
|
|
@@ -653,85 +635,85 @@ with gr.Blocks(title="IDEFICS Playground", theme=gr.themes.Base()) as demo:
|
|
| 653 |
textbox.submit(lambda : gr.update(label='📁 Upload image', interactive=True), [], upload_btn)
|
| 654 |
clear_btn.click(lambda : gr.update(label='📁 Upload image', interactive=True), [], upload_btn)
|
| 655 |
|
| 656 |
-
examples_path = os.path.dirname(__file__)
|
| 657 |
-
gr.Examples(
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
|
| 722 |
-
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
)
|
| 735 |
|
| 736 |
demo.queue(max_size=40)
|
| 737 |
demo.launch()
|
|
|
|
| 1 |
import copy
|
| 2 |
import hashlib
|
|
|
|
| 3 |
import os
|
| 4 |
import re
|
| 5 |
import torch
|
|
|
|
| 14 |
import gradio as gr
|
| 15 |
from gradio import processing_utils
|
| 16 |
from gradio_client.client import DEFAULT_TEMP_DIR
|
| 17 |
+
from transformers import AutoProcessor, AutoModelForCausalLM, TextIteratorStreamer, logging
|
| 18 |
|
| 19 |
from utils import create_model_inputs
|
| 20 |
|
|
|
|
| 26 |
trust_remote_code=True,
|
| 27 |
torch_dtype=torch.bfloat16,
|
| 28 |
token=os.environ["HF_AUTH_TOKEN"],
|
| 29 |
+
revision="1e05755c1c5cb2077a0f60b83ea1368c22a17282",
|
| 30 |
).to(DEVICE),
|
| 31 |
"HuggingFaceM4/idefics2": AutoModelForCausalLM.from_pretrained(
|
| 32 |
"HuggingFaceM4/idefics2",
|
| 33 |
trust_remote_code=True,
|
| 34 |
torch_dtype=torch.bfloat16,
|
| 35 |
token=os.environ["HF_AUTH_TOKEN"],
|
| 36 |
+
revision="5cd3c3a3eb5e0ea664f5ac09e73c9ef42da93a86",
|
| 37 |
).to(DEVICE),
|
| 38 |
}
|
|
|
|
|
|
|
| 39 |
PROCESSOR = AutoProcessor.from_pretrained(
|
| 40 |
"HuggingFaceM4/idefics2",
|
| 41 |
token=os.environ["HF_AUTH_TOKEN"],
|
|
|
|
| 72 |
# """\nAssistant: There is no dogs in this image. The picture shows a tennis player jumping to volley the ball.<end_of_utterance>""",
|
| 73 |
]
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
API_TOKEN = os.getenv("HF_AUTH_TOKEN")
|
| 76 |
# IDEFICS_LOGO = "https://huggingface.co/spaces/HuggingFaceM4/idefics_playground/resolve/main/IDEFICS_logo.png"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
BOT_AVATAR = "IDEFICS_logo.png"
|
| 78 |
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
# Monkey patch adapted from gradio.components.image.Image - mostly to make the `save` step optional in `pil_to_temp_file`
|
| 81 |
def hash_bytes(bytes: bytes):
|
|
|
|
| 230 |
resulting_string += elem
|
| 231 |
return resulting_string
|
| 232 |
|
| 233 |
+
|
| 234 |
+
def prompt_list_to_model_input(prompt_list: List[str]) -> Tuple[str, List[Image.Image]]:
|
| 235 |
+
"""
|
| 236 |
+
Create the final input string and image list to feed to the model's processor.
|
| 237 |
+
"""
|
| 238 |
+
images = []
|
| 239 |
+
for idx, part in enumerate(prompt_list):
|
| 240 |
+
if is_image(part):
|
| 241 |
+
if is_url(part):
|
| 242 |
+
images.append(fetch_images([part])[0])
|
| 243 |
+
else:
|
| 244 |
+
images.append(Image.open(part))
|
| 245 |
+
prompt_list[idx] = f"{FAKE_TOK_AROUND_IMAGE}{'<image>' * IMAGE_SEQ_LEN}{FAKE_TOK_AROUND_IMAGE}"
|
| 246 |
+
input_text = "".join(prompt_list)
|
| 247 |
+
input_text = input_text.replace(FAKE_TOK_AROUND_IMAGE * 2, FAKE_TOK_AROUND_IMAGE)
|
| 248 |
+
input_text = BOS_TOKEN + input_text.strip()
|
| 249 |
+
return input_text, images
|
| 250 |
+
|
| 251 |
+
|
| 252 |
def remove_spaces_around_token(text: str) -> str:
|
| 253 |
pattern = r"\s*(<fake_token_around_image>)\s*"
|
| 254 |
replacement = r"\1"
|
|
|
|
| 484 |
)
|
| 485 |
|
| 486 |
# Creating model inputs
|
| 487 |
+
input_text, images = prompt_list_to_model_input(formated_prompt_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
inputs = create_model_inputs([input_text], [images])
|
| 489 |
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
| 490 |
generation_args.update(inputs)
|
|
|
|
| 550 |
)
|
| 551 |
|
| 552 |
# Creating model inputs
|
| 553 |
+
input_text, images = prompt_list_to_model_input(formated_prompt_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 554 |
inputs = create_model_inputs([input_text], [images])
|
| 555 |
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
| 556 |
generation_args.update(inputs)
|
|
|
|
| 635 |
textbox.submit(lambda : gr.update(label='📁 Upload image', interactive=True), [], upload_btn)
|
| 636 |
clear_btn.click(lambda : gr.update(label='📁 Upload image', interactive=True), [], upload_btn)
|
| 637 |
|
| 638 |
+
# examples_path = os.path.dirname(__file__)
|
| 639 |
+
# gr.Examples(
|
| 640 |
+
# examples=[
|
| 641 |
+
# [
|
| 642 |
+
# (
|
| 643 |
+
# "Which famous person does the person in the image look like? Could you craft an engaging narrative"
|
| 644 |
+
# " featuring this character from the image as the main protagonist?"
|
| 645 |
+
# ),
|
| 646 |
+
# f"{examples_path}/example_images/obama-harry-potter.jpg",
|
| 647 |
+
# ],
|
| 648 |
+
# [
|
| 649 |
+
# "Can you describe the image? Do you think it's real?",
|
| 650 |
+
# f"{examples_path}/example_images/rabbit_force.png",
|
| 651 |
+
# ],
|
| 652 |
+
# ["Explain this meme to me.", f"{examples_path}/example_images/meme_french.jpg"],
|
| 653 |
+
# ["Give me a short and easy recipe for this dish.", f"{examples_path}/example_images/recipe_burger.webp"],
|
| 654 |
+
# [
|
| 655 |
+
# "I want to go somewhere similar to the one in the photo. Give me destinations and travel tips.",
|
| 656 |
+
# f"{examples_path}/example_images/travel_tips.jpg",
|
| 657 |
+
# ],
|
| 658 |
+
# [
|
| 659 |
+
# "Can you name the characters in the image and give their French names?",
|
| 660 |
+
# f"{examples_path}/example_images/gaulois.png",
|
| 661 |
+
# ],
|
| 662 |
+
# ["Write a complete sales ad for this product.", f"{examples_path}/example_images/product_ad.jpg"],
|
| 663 |
+
# [
|
| 664 |
+
# (
|
| 665 |
+
# "As an art critic AI assistant, could you describe this painting in details and make a thorough"
|
| 666 |
+
# " critic?"
|
| 667 |
+
# ),
|
| 668 |
+
# f"{examples_path}/example_images/art_critic.png",
|
| 669 |
+
# ],
|
| 670 |
+
# [
|
| 671 |
+
# "Can you tell me a very short story based on this image?",
|
| 672 |
+
# f"{examples_path}/example_images/chicken_on_money.png",
|
| 673 |
+
# ],
|
| 674 |
+
# ["Write 3 funny meme texts about this image.", f"{examples_path}/example_images/elon_smoking.jpg"],
|
| 675 |
+
# [
|
| 676 |
+
# "Who is in this picture? Why do people find it surprising?",
|
| 677 |
+
# f"{examples_path}/example_images/pope_doudoune.webp",
|
| 678 |
+
# ],
|
| 679 |
+
# ["What are the armed baguettes guarding?", f"{examples_path}/example_images/baguettes_guarding_paris.png"],
|
| 680 |
+
# ["What is this animal and why is it unusual?", f"{examples_path}/example_images/blue_dog.png"],
|
| 681 |
+
# [
|
| 682 |
+
# "What is this object and do you think it is horrifying?",
|
| 683 |
+
# f"{examples_path}/example_images/can_horror.png",
|
| 684 |
+
# ],
|
| 685 |
+
# [
|
| 686 |
+
# (
|
| 687 |
+
# "What is this sketch for? How would you make an argument to prove this sketch was made by Picasso"
|
| 688 |
+
# " himself?"
|
| 689 |
+
# ),
|
| 690 |
+
# f"{examples_path}/example_images/cat_sketch.png",
|
| 691 |
+
# ],
|
| 692 |
+
# ["Which celebrity does this claymation figure look like?", f"{examples_path}/example_images/kanye.jpg"],
|
| 693 |
+
# ["What can you tell me about the cap in this image?", f"{examples_path}/example_images/ironman_cap.png"],
|
| 694 |
+
# [
|
| 695 |
+
# "Can you write an advertisement for Coca-Cola based on this image?",
|
| 696 |
+
# f"{examples_path}/example_images/polar_bear_coke.png",
|
| 697 |
+
# ],
|
| 698 |
+
# [
|
| 699 |
+
# "What is happening in this image? Which famous personality does this person in center looks like?",
|
| 700 |
+
# f"{examples_path}/example_images/gandhi_selfie.jpg",
|
| 701 |
+
# ],
|
| 702 |
+
# [
|
| 703 |
+
# "What do you think the dog is doing and is it unusual?",
|
| 704 |
+
# f"{examples_path}/example_images/surfing_dog.jpg",
|
| 705 |
+
# ],
|
| 706 |
+
# ],
|
| 707 |
+
# inputs=[textbox, imagebox],
|
| 708 |
+
# outputs=[textbox, imagebox, chatbot],
|
| 709 |
+
# fn=process_example,
|
| 710 |
+
# cache_examples=False,
|
| 711 |
+
# examples_per_page=6,
|
| 712 |
+
# label=(
|
| 713 |
+
# "Click on any example below to get started.\nFor convenience, the model generations have been"
|
| 714 |
+
# " pre-computed with `idefics-80b-instruct`."
|
| 715 |
+
# ),
|
| 716 |
+
# )
|
| 717 |
|
| 718 |
demo.queue(max_size=40)
|
| 719 |
demo.launch()
|