JuanMa360 commited on
Commit
4bc1572
·
1 Parent(s): fec45e6

multimodal-bot

Browse files
Files changed (1) hide show
  1. app.py +29 -49
app.py CHANGED
@@ -1,58 +1,38 @@
 
 
 
1
  import gradio as gr
2
- import os
3
- import time
4
 
5
- def print_like_dislike(x: gr.LikeData):
6
- print(x.index, x.value, x.liked)
 
 
 
 
7
 
8
 
9
- def add_text(history, text):
10
- history = history + [(text, None)]
11
- return history, gr.Textbox(value="", interactive=False)
12
 
 
 
13
 
14
- def add_file(history, file):
15
- history = history + [((file.name,), None)]
16
- return history
 
 
 
 
17
 
 
 
18
 
19
- def bot(history):
20
- response = "**That's cool!**"
21
- history[-1][1] = ""
22
- for character in response:
23
- history[-1][1] += character
24
- time.sleep(0.05)
25
- yield history
26
 
27
-
28
- with gr.Blocks() as demo:
29
- chatbot = gr.Chatbot(
30
- [],
31
- elem_id="chatbot",
32
- bubble_full_width=False,
33
- avatar_images=(None, (os.path.join(os.path.abspath(''), "avatar.png"))),
34
- )
35
-
36
- with gr.Row():
37
- txt = gr.Textbox(
38
- scale=4,
39
- show_label=False,
40
- placeholder="Enter text and press enter, or upload an image",
41
- container=False,
42
- )
43
- btn = gr.UploadButton("📁", file_types=["image", "video", "audio"])
44
-
45
- txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
46
- bot, chatbot, chatbot, api_name="bot_response"
47
- )
48
- txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
49
- file_msg = btn.upload(add_file, [chatbot, btn], [chatbot], queue=False).then(
50
- bot, chatbot, chatbot
51
- )
52
-
53
- chatbot.like(print_like_dislike, None, None)
54
-
55
-
56
- demo.queue()
57
- if __name__ == "__main__":
58
- demo.launch()
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from PIL import Image
4
  import gradio as gr
 
 
5
 
6
+ model = AutoModelForCausalLM.from_pretrained(
7
+ "MILVLG/imp-v1-3b",
8
+ torch_dtype=torch.float16,
9
+ device_map="auto",
10
+ trust_remote_code=True)
11
+ tokenizer = AutoTokenizer.from_pretrained("MILVLG/imp-v1-3b", trust_remote_code=True)
12
 
13
 
14
+ def generate_answer(text, image):
 
 
15
 
16
+ input_ids = tokenizer(text, return_tensors='pt').input_ids
17
+ image_tensor = model.image_preprocess(image)
18
 
19
+ output_ids = model.generate(
20
+ input_ids,
21
+ max_new_tokens=100,
22
+ images=image_tensor,
23
+ use_cache=True)[0]
24
+
25
+ return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
26
 
27
+ text_input = gr.Textbox(lines=5, label="Enter text")
28
+ image_input = gr.Image(shape=(224, 224), label="Upload Image")
29
 
30
+ iface = gr.Interface(
31
+ fn=generate_answer,
32
+ inputs=[text_input, image_input],
33
+ outputs="text",
34
+ title="DD360-Bot-Multimodal",
35
+ description="Enter text and upload an image to receive a response from the chatbot."
36
+ )
37
 
38
+ iface.launch()