Spaces:

Princeaka
/

multimodal_module

Running

File size: 4,515 Bytes

02ab8b4
1afccba
 
270b829
 
 
4ce8649
 
270b829
1afccba
4ce8649
 
270b829
1afccba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270b829
1afccba
 
 
 
 
270b829
1afccba
 
 
270b829
4ce8649
1afccba
270b829
4ce8649
1afccba
 
 
 
 
270b829
4ce8649
1afccba
 
4ce8649
 
1afccba
4ce8649
 
 
 
1afccba
 
4ce8649
1afccba
4ce8649
 
 
1afccba
 
4ce8649
 
1afccba
4ce8649
 
 
1afccba
 
4ce8649
 
 
 
 
1afccba
 
 
 
 
 
 
 
4ce8649
 
1afccba

import os
import shutil
import asyncio
import gradio as gr
from multimodal_module import MultiModalChatModule

# Initialize module
mm = MultiModalChatModule()

# Environment configuration (already safe but keep)
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# A tiny async-compatible "file-like" wrapper so your multimodal_module methods
# (which expect objects with an async download_to_drive(...) method) work
class AsyncPathWrapper:
    def __init__(self, path: str):
        self.path = path

    async def download_to_drive(self, dst_path: str):
        # perform copy synchronously but keep API async
        try:
            os.makedirs(os.path.dirname(dst_path), exist_ok=True)
            shutil.copy(self.path, dst_path)
        except Exception as e:
            # raise to allow upper-level error handling
            raise

# Helper to call async methods from sync Gradio callbacks
def run_async(fn, *args, **kwargs):
    return asyncio.run(fn(*args, **kwargs))

# Wrappers that adapt Gradio returned file paths to the module's expected interface
def _wrap_audio(audio_path):
    if not audio_path:
        return None
    return AsyncPathWrapper(audio_path)

def _wrap_image(image_path):
    if not image_path:
        return None
    return AsyncPathWrapper(image_path)

def _wrap_file(file_path):
    if not file_path:
        return None
    return AsyncPathWrapper(file_path)

# Gradio binding functions
def process_voice(audio_filepath, user_id):
    # mm.process_voice_message expects an object with download_to_drive
    wrapped = _wrap_audio(audio_filepath)
    return run_async(mm.process_voice_message, wrapped, int(user_id))

def process_image(image_filepath, user_id):
    wrapped = _wrap_image(image_filepath)
    return run_async(mm.process_image_message, wrapped, int(user_id))

def chat(text, user_id, lang):
    return run_async(mm.generate_response, text, int(user_id), lang)

def generate_image(prompt, user_id):
    return run_async(mm.generate_image_from_text, prompt, int(user_id))

def process_file(file_path, user_id):
    wrapped = _wrap_file(file_path)
    return run_async(mm.process_file, wrapped, int(user_id))

with gr.Blocks(title="Multimodal AI Assistant") as app:
    gr.Markdown("## 🚀 Multimodal AI Assistant (Space-friendly)")

    with gr.Tab("💬 Text Chat"):
        with gr.Row():
            user_id_txt = gr.Textbox(label="User ID", value="123")
            lang = gr.Dropdown(["en", "es", "fr", "de"], label="Language", value="en")
        chat_input = gr.Textbox(label="Your Message")
        chat_output = gr.Textbox(label="AI Response", interactive=False)
        chat_btn = gr.Button("Send")
        chat_btn.click(fn=chat, inputs=[chat_input, user_id_txt, lang], outputs=chat_output)

    with gr.Tab("🎙️ Voice"):
        voice_input = gr.Audio(source="microphone", type="filepath", label="Speak or upload an audio file")
        voice_user = gr.Textbox(label="User ID", value="123")
        voice_output = gr.JSON(label="Analysis Results")
        voice_btn = gr.Button("Process")
        voice_btn.click(fn=process_voice, inputs=[voice_input, voice_user], outputs=voice_output)

    with gr.Tab("🖼️ Images"):
        with gr.Tab("Describe"):
            img_input = gr.Image(type="filepath", label="Upload an image")
            img_user = gr.Textbox(label="User ID", value="123")
            img_output = gr.Textbox(label="Description")
            img_btn = gr.Button("Describe")
            img_btn.click(fn=process_image, inputs=[img_input, img_user], outputs=img_output)

        with gr.Tab("Generate"):
            gen_prompt = gr.Textbox(label="Prompt")
            gen_user = gr.Textbox(label="User ID", value="123")
            gen_output = gr.Image(label="Generated Image")
            gen_btn = gr.Button("Generate")
            gen_btn.click(fn=generate_image, inputs=[gen_prompt, gen_user], outputs=gen_output)

    with gr.Tab("📄 Files"):
        file_input = gr.File(file_count="single", label="Upload a document (pdf, txt, docx)")
        file_user = gr.Textbox(label="User ID", value="123")
        file_output = gr.JSON(label="File Processing Result")
        file_btn = gr.Button("Process File")
        file_btn.click(fn=process_file, inputs=[file_input, file_user], outputs=file_output)

if __name__ == "__main__":
    # Let Spaces manage server settings. This still works locally.
    app.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))