Princeaka's picture
Update app.py
4ce8649 verified
raw
history blame
2.64 kB
import os
import gradio as gr
from multimodal_module import MultiModalChatModule
import asyncio
# Initialize module
mm = MultiModalChatModule()
# Environment configuration
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
async def async_wrapper(fn, *args):
"""Handle async calls from Gradio"""
return await fn(*args)
def process_voice(audio, user_id):
return asyncio.run(async_wrapper(mm.process_voice_message, audio, int(user_id)))
def process_image(image, user_id):
return asyncio.run(async_wrapper(mm.process_image_message, image, int(user_id)))
def chat(text, user_id, lang):
return asyncio.run(async_wrapper(mm.generate_response, text, int(user_id), lang))
def generate_image(prompt, user_id):
return asyncio.run(async_wrapper(
mm.generate_image_from_text,
prompt,
int(user_id)
))
with gr.Blocks(title="Multimodal AI Assistant") as app:
gr.Markdown("## πŸš€ Multimodal AI Assistant")
with gr.Tab("πŸ’¬ Text Chat"):
with gr.Row():
user_id = gr.Textbox(label="User ID", value="123")
lang = gr.Dropdown(["en", "es", "fr", "de"], label="Language", value="en")
chat_input = gr.Textbox(label="Your Message")
chat_output = gr.Textbox(label="AI Response", interactive=False)
chat_btn = gr.Button("Send")
with gr.Tab("πŸŽ™οΈ Voice"):
voice_input = gr.Audio(sources=["microphone", "upload"], type="filepath")
voice_user = gr.Textbox(label="User ID", value="123")
voice_output = gr.JSON(label="Analysis Results")
voice_btn = gr.Button("Process")
with gr.Tab("πŸ–ΌοΈ Images"):
with gr.Tab("Describe"):
img_input = gr.Image(type="filepath")
img_user = gr.Textbox(label="User ID", value="123")
img_output = gr.Textbox(label="Description")
img_btn = gr.Button("Describe")
with gr.Tab("Generate"):
gen_prompt = gr.Textbox(label="Prompt")
gen_user = gr.Textbox(label="User ID", value="123")
gen_output = gr.Image(label="Generated Image")
gen_btn = gr.Button("Generate")
# Event handlers
chat_btn.click(chat, [chat_input, user_id, lang], chat_output)
voice_btn.click(process_voice, [voice_input, voice_user], voice_output)
img_btn.click(process_image, [img_input, img_user], img_output)
gen_btn.click(generate_image, [gen_prompt, gen_user], gen_output)
if __name__ == "__main__":
app.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
)