Spaces:

Princeaka
/

multimodal_module

Runtime error

App Files Files Community

Princeaka commited on 8 days ago

Commit

bb4627c

verified ·

1 Parent(s): 052fcb6

Update app.py

Browse files

Files changed (1) hide show

app.py +217 -108

app.py CHANGED Viewed

@@ -1,117 +1,226 @@
 import os
 import shutil
 import asyncio
 import gradio as gr
 from multimodal_module import MultiModalChatModule
-# Initialize module
-mm = MultiModalChatModule()
-# Environment configuration (already safe but keep)
-os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # Disable GPU
-os.environ["IMAGEIO_FFMPEG_EXE"] = "/usr/bin/ffmpeg"  # Explicit path
-os.environ["FFMPEG_BINARY"] = "/usr/bin/ffmpeg"  # Backup for older versions
-# A tiny async-compatible "file-like" wrapper so your multimodal_module methods
-# (which expect objects with an async download_to_drive(...) method) work
-class AsyncPathWrapper:
-    def __init__(self, path: str):
-        self.path = path
-    async def download_to_drive(self, dst_path: str):
-        # perform copy synchronously but keep API async
-        try:
-            os.makedirs(os.path.dirname(dst_path), exist_ok=True)
-            shutil.copy(self.path, dst_path)
-        except Exception as e:
-            # raise to allow upper-level error handling
-            raise
-# Helper to call async methods from sync Gradio callbacks
-def run_async(fn, *args, **kwargs):
-    return asyncio.run(fn(*args, **kwargs))
-# Wrappers that adapt Gradio returned file paths to the module's expected interface
-def _wrap_audio(audio_path):
-    if not audio_path:
-        return None
-    return AsyncPathWrapper(audio_path)
-def _wrap_image(image_path):
-    if not image_path:
-        return None
-    return AsyncPathWrapper(image_path)
-def _wrap_file(file_path):
-    if not file_path:
-        return None
-    return AsyncPathWrapper(file_path)
-# Gradio binding functions
-def process_voice(audio_filepath, user_id):
-    # mm.process_voice_message expects an object with download_to_drive
-    wrapped = _wrap_audio(audio_filepath)
-    return run_async(mm.process_voice_message, wrapped, int(user_id))
-def process_image(image_filepath, user_id):
-    wrapped = _wrap_image(image_filepath)
-    return run_async(mm.process_image_message, wrapped, int(user_id))
-def chat(text, user_id, lang):
-    return run_async(mm.generate_response, text, int(user_id), lang)
-def generate_image(prompt, user_id):
-    return run_async(mm.generate_image_from_text, prompt, int(user_id))
-def process_file(file_path, user_id):
-    wrapped = _wrap_file(file_path)
-    return run_async(mm.process_file, wrapped, int(user_id))
-with gr.Blocks(title="Multimodal AI Assistant") as app:
-    gr.Markdown("## 🚀 Multimodal AI Assistant (Space-friendly)")
-    with gr.Tab("💬 Text Chat"):
         with gr.Row():
-            user_id_txt = gr.Textbox(label="User ID", value="123")
-            lang = gr.Dropdown(["en", "es", "fr", "de"], label="Language", value="en")
-        chat_input = gr.Textbox(label="Your Message")
-        chat_output = gr.Textbox(label="AI Response", interactive=False)
-        chat_btn = gr.Button("Send")
-        chat_btn.click(fn=chat, inputs=[chat_input, user_id_txt, lang], outputs=chat_output)
-    with gr.Tab("🎙️ Voice"):
-        voice_input = gr.Audio(source="microphone", type="filepath", label="Speak or upload an audio file")
-        voice_user = gr.Textbox(label="User ID", value="123")
-        voice_output = gr.JSON(label="Analysis Results")
-        voice_btn = gr.Button("Process")
-        voice_btn.click(fn=process_voice, inputs=[voice_input, voice_user], outputs=voice_output)
-    with gr.Tab("🖼️ Images"):
-        with gr.Tab("Describe"):
-            img_input = gr.Image(type="filepath", label="Upload an image")
-            img_user = gr.Textbox(label="User ID", value="123")
-            img_output = gr.Textbox(label="Description")
-            img_btn = gr.Button("Describe")
-            img_btn.click(fn=process_image, inputs=[img_input, img_user], outputs=img_output)
-        with gr.Tab("Generate"):
-            gen_prompt = gr.Textbox(label="Prompt")
-            gen_user = gr.Textbox(label="User ID", value="123")
-            gen_output = gr.Image(label="Generated Image")
-            gen_btn = gr.Button("Generate")
-            gen_btn.click(fn=generate_image, inputs=[gen_prompt, gen_user], outputs=gen_output)
-    with gr.Tab("📄 Files"):
-        file_input = gr.File(file_count="single", label="Upload a document (pdf, txt, docx)")
-        file_user = gr.Textbox(label="User ID", value="123")
-        file_output = gr.JSON(label="File Processing Result")
         file_btn = gr.Button("Process File")
-        file_btn.click(fn=process_file, inputs=[file_input, file_user], outputs=file_output)
-if __name__ == "__main__":
-    # Let Spaces manage server settings. This still works locally.
-    app.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))

+# app.py - Gradio wrapper for your existing multimodal_module.py (unchanged)
 import os
 import shutil
 import asyncio
+import json
+from typing import Optional
 import gradio as gr
+# Import your multimodal module exactly as-is
 from multimodal_module import MultiModalChatModule
+# Instantiate your AI (will lazy-load inside your module)
+AI = MultiModalChatModule()
+# ------------------------------------------------------------------
+# Helpers / adaptation layer
+# Your multimodal methods expect an object that has an async
+# `download_to_drive(path)` method. Gradio file upload gives us a
+# local temp file path (with attribute .name). We'll wrap it.
+# ------------------------------------------------------------------
+class GradioFileWrapper:
+    def __init__(self, gr_file):
+        """
+        gr_file: Gradio UploadedFile object or path string
+        - In Gradio, the value passed is a dict/path or a tempfile Path object.
+        """
+        # If gradio passes a dict with "name" or direct path string, handle both.
+        self._path = None
+        if isinstance(gr_file, str):
+            # already a path
+            self._path = gr_file
+        else:
+            # gradio may give a file-like object with .name attribute
+            try:
+                self._path = gr_file.name  # typical for gradio
+            except Exception:
+                # fallback: convert dict to path if needed
+                try:
+                    self._path = gr_file["name"]
+                except Exception:
+                    raise ValueError("Unsupported file object from Gradio")
+    async def download_to_drive(self, dst_path: str) -> None:
+        # Asynchronous signature to match your module's expectations.
+        # We will copy the local file path to dst_path.
+        # Gradio stores the uploaded file locally, so simple copy works.
+        loop = asyncio.get_event_loop()
+        await loop.run_in_executor(None, shutil.copyfile, self._path, dst_path)
+# Small helper to call async functions from sync Gradio callbacks
+def run_async(coro):
+    return asyncio.run(coro)
+# ------------------------------------------------------------------
+# Gradio callback wrappers
+# ------------------------------------------------------------------
+def text_chat(user_id: Optional[int], text: str, lang: str = "en"):
+    try:
+        uid = int(user_id) if user_id not in (None, "", "None") else 0
+        reply = run_async(AI.generate_response(text, uid, lang))
+        return reply
+    except Exception as e:
+        return f"Error: {e}"
+def voice_process(user_id: Optional[int], audio_file):
+    try:
+        uid = int(user_id) if user_id not in (None, "", "None") else 0
+        wrapper = GradioFileWrapper(audio_file)
+        result = run_async(AI.process_voice_message(wrapper, uid))
+        # return a readable text blob with details
+        return json.dumps(result, ensure_ascii=False, indent=2)
+    except Exception as e:
+        return f"Error: {e}"
+def generate_voice(user_id: Optional[int], reply_text: str, fmt: str = "ogg"):
+    try:
+        uid = int(user_id) if user_id not in (None, "", "None") else 0
+        path = run_async(AI.generate_voice_reply(reply_text, uid, fmt))
+        # Gradio audio accepts a path
+        return path
+    except Exception as e:
+        return None, f"Error: {e}"
+def image_caption(user_id: Optional[int], image_file):
+    try:
+        uid = int(user_id) if user_id not in (None, "", "None") else 0
+        wrapper = GradioFileWrapper(image_file)
+        caption = run_async(AI.process_image_message(wrapper, uid))
+        return caption
+    except Exception as e:
+        return f"Error: {e}"
+def generate_image(user_id: Optional[int], prompt: str, width: int = 512, height: int = 512, steps: int = 30):
+    try:
+        uid = int(user_id) if user_id not in (None, "", "None") else 0
+        path = run_async(AI.generate_image_from_text(prompt, uid, width=width, height=height, steps=steps))
+        return path
+    except Exception as e:
+        return f"Error: {e}"
+def edit_image(user_id: Optional[int], image_file, mask_file, prompt: str = ""):
+    try:
+        uid = int(user_id) if user_id not in (None, "", "None") else 0
+        img_w = GradioFileWrapper(image_file)
+        mask_w = GradioFileWrapper(mask_file) if mask_file not in (None, "", "None") else None
+        path = run_async(AI.edit_image_inpaint(img_w, mask_w, prompt, uid))
+        return path
+    except Exception as e:
+        return f"Error: {e}"
+def process_video(user_id: Optional[int], video_file):
+    try:
+        uid = int(user_id) if user_id not in (None, "", "None") else 0
+        wrapper = GradioFileWrapper(video_file)
+        res = run_async(AI.process_video(wrapper, uid))
+        return json.dumps(res, ensure_ascii=False, indent=2)
+    except Exception as e:
+        return f"Error: {e}"
+def process_file(user_id: Optional[int], file_obj):
+    try:
+        uid = int(user_id) if user_id not in (None, "", "None") else 0
+        w = GradioFileWrapper(file_obj)
+        res = run_async(AI.process_file(w, uid))
+        return json.dumps(res, ensure_ascii=False, indent=2)
+    except Exception as e:
+        return f"Error: {e}"
+def code_complete(user_id: Optional[int], prompt: str, max_tokens: int = 512):
+    try:
+        uid = int(user_id) if user_id not in (None, "", "None") else 0
+        out = run_async(AI.code_complete(prompt, max_tokens=max_tokens))
+        return out
+    except Exception as e:
+        return f"Error: {e}"
+# ------------------------------------------------------------------
+# Gradio UI
+# ------------------------------------------------------------------
+with gr.Blocks(title="Multimodal Bot (uses your multimodal_module.py)") as demo:
+    gr.Markdown("# Multimodal Bot\nThis Space uses the exact `multimodal_module.py` you uploaded. Use the tabs below.")
+    with gr.Tab("Text Chat"):
+        with gr.Row():
+            user_id_txt = gr.Textbox(label="User ID (optional)", placeholder="0")
+            lang_sel = gr.Dropdown(choices=["en","zh","ja","ko","es","fr","de","it"], value="en", label="Language")
+        txt_in = gr.Textbox(label="User text", lines=4)
+        txt_out = gr.Textbox(label="Reply", lines=6)
+        txt_btn = gr.Button("Send")
+        txt_btn.click(fn=text_chat, inputs=[user_id_txt, txt_in, lang_sel], outputs=txt_out)
+    with gr.Tab("Voice (transcribe + emotion)"):
+        with gr.Row():
+            user_id_voice = gr.Textbox(label="User ID (optional)", placeholder="0")
+        voice_in = gr.Audio(source="upload", type="filepath", label="Upload voice (.ogg/.wav)")
+        voice_out = gr.Textbox(label="Result JSON")
+        voice_btn = gr.Button("Process Voice")
+        voice_btn.click(fn=voice_process, inputs=[user_id_voice, voice_in], outputs=voice_out)
+    with gr.Tab("Voice Reply (TTS)"):
+        with gr.Row():
+            user_id_vr = gr.Textbox(label="User ID (optional)", placeholder="0")
+        vr_text = gr.Textbox(label="Text to convert to voice", lines=4)
+        vr_fmt = gr.Dropdown(choices=["ogg","wav","mp3"], value="ogg", label="Format")
+        vr_audio = gr.Audio(label="Generated Voice")
+        vr_btn = gr.Button("Generate Voice")
+        vr_btn.click(fn=generate_voice, inputs=[user_id_vr, vr_text, vr_fmt], outputs=vr_audio)
+    with gr.Tab("Image Caption"):
+        with gr.Row():
+            user_id_img = gr.Textbox(label="User ID (optional)", placeholder="0")
+        img_in = gr.Image(type="filepath", label="Upload Image")
+        img_out = gr.Textbox(label="Caption")
+        img_btn = gr.Button("Caption Image")
+        img_btn.click(fn=image_caption, inputs=[user_id_img, img_in], outputs=img_out)
+    with gr.Tab("Image Generate"):
         with gr.Row():
+            user_id_gi = gr.Textbox(label="User ID (optional)", placeholder="0")
+        prompt_in = gr.Textbox(label="Prompt", lines=3)
+        width = gr.Slider(minimum=256, maximum=1024, step=64, value=512, label="Width")
+        height = gr.Slider(minimum=256, maximum=1024, step=64, value=512, label="Height")
+        steps = gr.Slider(minimum=10, maximum=50, step=5, value=30, label="Steps")
+        gen_out = gr.Image(type="filepath", label="Generated image")
+        gen_btn = gr.Button("Generate")
+        gen_btn.click(fn=generate_image, inputs=[user_id_gi, prompt_in, width, height, steps], outputs=gen_out)
+    with gr.Tab("Image Edit (Inpaint)"):
+        with gr.Row():
+            user_id_ie = gr.Textbox(label="User ID (optional)", placeholder="0")
+        edit_img = gr.Image(type="filepath", label="Image to edit")
+        edit_mask = gr.Image(type="filepath", label="Mask (white=edit black=keep) (optional)")
+        edit_prompt = gr.Textbox(label="Prompt (what to paint)", lines=2)
+        edit_out = gr.Image(type="filepath", label="Edited image")
+        edit_btn = gr.Button("Edit Image")
+        edit_btn.click(fn=edit_image, inputs=[user_id_ie, edit_img, edit_mask, edit_prompt], outputs=edit_out)
+    with gr.Tab("Video"):
+        with gr.Row():
+            user_id_vid = gr.Textbox(label="User ID (optional)", placeholder="0")
+        vid_in = gr.Video(label="Upload video")
+        vid_out = gr.Textbox(label="Result JSON")
+        vid_btn = gr.Button("Process Video")
+        vid_btn.click(fn=process_video, inputs=[user_id_vid, vid_in], outputs=vid_out)
+    with gr.Tab("Files (PDF/DOCX/TXT)"):
+        with gr.Row():
+            user_id_file = gr.Textbox(label="User ID (optional)", placeholder="0")
+        file_in = gr.File(label="Upload file")
+        file_out = gr.Textbox(label="Result JSON")
         file_btn = gr.Button("Process File")
+        file_btn.click(fn=process_file, inputs=[user_id_file, file_in], outputs=file_out)
+    with gr.Tab("Code (StarCoder)"):
+        with gr.Row():
+            user_id_code = gr.Textbox(label="User ID (optional)", placeholder="0")
+        code_prompt = gr.Textbox(label="Prompt for code generation", lines=6)
+        code_out = gr.Textbox(label="Generated code", lines=12)
+        code_btn = gr.Button("Generate Code")
+        code_btn.click(fn=code_complete, inputs=[user_id_code, code_prompt], outputs=code_out)
+    gr.Markdown("----\nYour underlying `multimodal_module.py` is used exactly as uploaded. Be patient on first calls — heavy models load lazily.")
+# Launch
+demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))