File size: 1,729 Bytes
02ab8b4
 
270b829
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"  # Prevents version checks
import gradio as gr
from multimodal_module import MultiModalChatModule
import asyncio

AI = MultiModalChatModule()

async def chat_async(text, user_id, lang):
    return await AI.generate_response(text, int(user_id), lang)

def chat(text, user_id, lang="en"):
    return asyncio.run(chat_async(text, user_id, lang))

async def voice_async(audio, user_id):
    return await AI.process_voice_message(audio, int(user_id))

def voice(audio, user_id):
    return asyncio.run(voice_async(audio, user_id))

async def image_async(image, user_id):
    return await AI.process_image_message(image, int(user_id))

def image(image, user_id):
    return asyncio.run(image_async(image, user_id))

with gr.Blocks() as demo:
    gr.Markdown("# Multimodal Bot API")
    
    with gr.Tab("Text Chat"):
        t_in = gr.Textbox(label="Message")
        u_in = gr.Textbox(label="User ID", value="123")
        l_in = gr.Textbox(label="Language", value="en")
        t_out = gr.Textbox(label="AI Response")
        gr.Button("Send").click(chat, inputs=[t_in, u_in, l_in], outputs=t_out)
    
    with gr.Tab("Voice"):
        v_in = gr.Audio(sources=["microphone", "upload"], type="filepath")
        vu_in = gr.Textbox(label="User ID", value="123")
        v_out = gr.JSON(label="Voice Analysis")
        gr.Button("Process").click(voice, inputs=[v_in, vu_in], outputs=v_out)
    
    with gr.Tab("Image"):
        i_in = gr.Image(type="filepath")
        iu_in = gr.Textbox(label="User ID", value="123")
        i_out = gr.Textbox(label="Image Description")
        gr.Button("Describe").click(image, inputs=[i_in, iu_in], outputs=i_out)

demo.queue()
demo.launch()