Spaces:

Princeaka
/

multimodal_module

Building

App Files Files Community

Princeaka commited on 2 days ago

Commit

1afccba

verified ·

1 Parent(s): 72ec8f4

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -34

app.py CHANGED Viewed

@@ -1,74 +1,112 @@
 import os
 import gradio as gr
 from multimodal_module import MultiModalChatModule
-import asyncio
 # Initialize module
 mm = MultiModalChatModule()
-# Environment configuration
 os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
-async def async_wrapper(fn, *args):
-    """Handle async calls from Gradio"""
-    return await fn(*args)
-def process_voice(audio, user_id):
-    return asyncio.run(async_wrapper(mm.process_voice_message, audio, int(user_id)))
-def process_image(image, user_id):
-    return asyncio.run(async_wrapper(mm.process_image_message, image, int(user_id)))
 def chat(text, user_id, lang):
-    return asyncio.run(async_wrapper(mm.generate_response, text, int(user_id), lang))
 def generate_image(prompt, user_id):
-    return asyncio.run(async_wrapper(
-        mm.generate_image_from_text,
-        prompt,
-        int(user_id)
-    ))
 with gr.Blocks(title="Multimodal AI Assistant") as app:
-    gr.Markdown("## 🚀 Multimodal AI Assistant")
     with gr.Tab("💬 Text Chat"):
         with gr.Row():
-            user_id = gr.Textbox(label="User ID", value="123")
             lang = gr.Dropdown(["en", "es", "fr", "de"], label="Language", value="en")
         chat_input = gr.Textbox(label="Your Message")
         chat_output = gr.Textbox(label="AI Response", interactive=False)
         chat_btn = gr.Button("Send")
     with gr.Tab("🎙️ Voice"):
-        voice_input = gr.Audio(sources=["microphone", "upload"], type="filepath")
         voice_user = gr.Textbox(label="User ID", value="123")
         voice_output = gr.JSON(label="Analysis Results")
         voice_btn = gr.Button("Process")
     with gr.Tab("🖼️ Images"):
         with gr.Tab("Describe"):
-            img_input = gr.Image(type="filepath")
             img_user = gr.Textbox(label="User ID", value="123")
             img_output = gr.Textbox(label="Description")
             img_btn = gr.Button("Describe")
         with gr.Tab("Generate"):
             gen_prompt = gr.Textbox(label="Prompt")
             gen_user = gr.Textbox(label="User ID", value="123")
             gen_output = gr.Image(label="Generated Image")
             gen_btn = gr.Button("Generate")
-    # Event handlers
-    chat_btn.click(chat, [chat_input, user_id, lang], chat_output)
-    voice_btn.click(process_voice, [voice_input, voice_user], voice_output)
-    img_btn.click(process_image, [img_input, img_user], img_output)
-    gen_btn.click(generate_image, [gen_prompt, gen_user], gen_output)
 if __name__ == "__main__":
-    app.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False
-    )

 import os
+import shutil
+import asyncio
 import gradio as gr
 from multimodal_module import MultiModalChatModule
 # Initialize module
 mm = MultiModalChatModule()
+# Environment configuration (already safe but keep)
 os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
+# A tiny async-compatible "file-like" wrapper so your multimodal_module methods
+# (which expect objects with an async download_to_drive(...) method) work
+class AsyncPathWrapper:
+    def __init__(self, path: str):
+        self.path = path
+    async def download_to_drive(self, dst_path: str):
+        # perform copy synchronously but keep API async
+        try:
+            os.makedirs(os.path.dirname(dst_path), exist_ok=True)
+            shutil.copy(self.path, dst_path)
+        except Exception as e:
+            # raise to allow upper-level error handling
+            raise
+# Helper to call async methods from sync Gradio callbacks
+def run_async(fn, *args, **kwargs):
+    return asyncio.run(fn(*args, **kwargs))
+# Wrappers that adapt Gradio returned file paths to the module's expected interface
+def _wrap_audio(audio_path):
+    if not audio_path:
+        return None
+    return AsyncPathWrapper(audio_path)
+def _wrap_image(image_path):
+    if not image_path:
+        return None
+    return AsyncPathWrapper(image_path)
+def _wrap_file(file_path):
+    if not file_path:
+        return None
+    return AsyncPathWrapper(file_path)
+# Gradio binding functions
+def process_voice(audio_filepath, user_id):
+    # mm.process_voice_message expects an object with download_to_drive
+    wrapped = _wrap_audio(audio_filepath)
+    return run_async(mm.process_voice_message, wrapped, int(user_id))
+def process_image(image_filepath, user_id):
+    wrapped = _wrap_image(image_filepath)
+    return run_async(mm.process_image_message, wrapped, int(user_id))
 def chat(text, user_id, lang):
+    return run_async(mm.generate_response, text, int(user_id), lang)
 def generate_image(prompt, user_id):
+    return run_async(mm.generate_image_from_text, prompt, int(user_id))
+def process_file(file_path, user_id):
+    wrapped = _wrap_file(file_path)
+    return run_async(mm.process_file, wrapped, int(user_id))
 with gr.Blocks(title="Multimodal AI Assistant") as app:
+    gr.Markdown("## 🚀 Multimodal AI Assistant (Space-friendly)")
     with gr.Tab("💬 Text Chat"):
         with gr.Row():
+            user_id_txt = gr.Textbox(label="User ID", value="123")
             lang = gr.Dropdown(["en", "es", "fr", "de"], label="Language", value="en")
         chat_input = gr.Textbox(label="Your Message")
         chat_output = gr.Textbox(label="AI Response", interactive=False)
         chat_btn = gr.Button("Send")
+        chat_btn.click(fn=chat, inputs=[chat_input, user_id_txt, lang], outputs=chat_output)
     with gr.Tab("🎙️ Voice"):
+        voice_input = gr.Audio(source="microphone", type="filepath", label="Speak or upload an audio file")
         voice_user = gr.Textbox(label="User ID", value="123")
         voice_output = gr.JSON(label="Analysis Results")
         voice_btn = gr.Button("Process")
+        voice_btn.click(fn=process_voice, inputs=[voice_input, voice_user], outputs=voice_output)
     with gr.Tab("🖼️ Images"):
         with gr.Tab("Describe"):
+            img_input = gr.Image(type="filepath", label="Upload an image")
             img_user = gr.Textbox(label="User ID", value="123")
             img_output = gr.Textbox(label="Description")
             img_btn = gr.Button("Describe")
+            img_btn.click(fn=process_image, inputs=[img_input, img_user], outputs=img_output)
         with gr.Tab("Generate"):
             gen_prompt = gr.Textbox(label="Prompt")
             gen_user = gr.Textbox(label="User ID", value="123")
             gen_output = gr.Image(label="Generated Image")
             gen_btn = gr.Button("Generate")
+            gen_btn.click(fn=generate_image, inputs=[gen_prompt, gen_user], outputs=gen_output)
+    with gr.Tab("📄 Files"):
+        file_input = gr.File(file_count="single", label="Upload a document (pdf, txt, docx)")
+        file_user = gr.Textbox(label="User ID", value="123")
+        file_output = gr.JSON(label="File Processing Result")
+        file_btn = gr.Button("Process File")
+        file_btn.click(fn=process_file, inputs=[file_input, file_user], outputs=file_output)
 if __name__ == "__main__":
+    # Let Spaces manage server settings. This still works locally.
+    app.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))