Spaces:

Princeaka
/

multimodal_module

Running

App Files Files Community

Princeaka commited on 4 days ago

Commit

9858a63

verified ·

1 Parent(s): 0174295

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -92

app.py CHANGED Viewed

@@ -6,59 +6,40 @@ import json
 from typing import Optional
 import gradio as gr
-# Import your multimodal module exactly as-is
 from multimodal_module import MultiModalChatModule
-# Instantiate your AI (will lazy-load inside your module)
 AI = MultiModalChatModule()
 # ------------------------------------------------------------------
-# Helpers / adaptation layer
-# Your multimodal methods expect an object that has an async
-# `download_to_drive(path)` method. Gradio file upload gives us a
-# local temp file path (with attribute .name). We'll wrap it.
 # ------------------------------------------------------------------
 class GradioFileWrapper:
     def __init__(self, gr_file):
-        """
-        gr_file: Gradio UploadedFile object or path string
-        - In Gradio, the value passed is a dict/path or a tempfile Path object.
-        """
-        # If gradio passes a dict with "name" or direct path string, handle both.
-        self._path = None
         if isinstance(gr_file, str):
-            # already a path
             self._path = gr_file
         else:
-            # gradio may give a file-like object with .name attribute
             try:
-                self._path = gr_file.name  # typical for gradio
             except Exception:
-                # fallback: convert dict to path if needed
                 try:
                     self._path = gr_file["name"]
                 except Exception:
                     raise ValueError("Unsupported file object from Gradio")
     async def download_to_drive(self, dst_path: str) -> None:
-        # Asynchronous signature to match your module's expectations.
-        # We will copy the local file path to dst_path.
-        # Gradio stores the uploaded file locally, so simple copy works.
         loop = asyncio.get_event_loop()
         await loop.run_in_executor(None, shutil.copyfile, self._path, dst_path)
-# Small helper to call async functions from sync Gradio callbacks
 def run_async(coro):
     return asyncio.run(coro)
 # ------------------------------------------------------------------
-# Gradio callback wrappers
 # ------------------------------------------------------------------
 def text_chat(user_id: Optional[int], text: str, lang: str = "en"):
     try:
-        uid = int(user_id) if user_id not in (None, "", "None") else 0
         reply = run_async(AI.generate_response(text, uid, lang))
         return reply
     except Exception as e:
@@ -66,26 +47,24 @@ def text_chat(user_id: Optional[int], text: str, lang: str = "en"):
 def voice_process(user_id: Optional[int], audio_file):
     try:
-        uid = int(user_id) if user_id not in (None, "", "None") else 0
         wrapper = GradioFileWrapper(audio_file)
         result = run_async(AI.process_voice_message(wrapper, uid))
-        # return a readable text blob with details
         return json.dumps(result, ensure_ascii=False, indent=2)
     except Exception as e:
         return f"Error: {e}"
 def generate_voice(user_id: Optional[int], reply_text: str, fmt: str = "ogg"):
     try:
-        uid = int(user_id) if user_id not in (None, "", "None") else 0
         path = run_async(AI.generate_voice_reply(reply_text, uid, fmt))
-        # Gradio audio accepts a path
         return path
     except Exception as e:
         return None, f"Error: {e}"
 def image_caption(user_id: Optional[int], image_file):
     try:
-        uid = int(user_id) if user_id not in (None, "", "None") else 0
         wrapper = GradioFileWrapper(image_file)
         caption = run_async(AI.process_image_message(wrapper, uid))
         return caption
@@ -94,7 +73,7 @@ def image_caption(user_id: Optional[int], image_file):
 def generate_image(user_id: Optional[int], prompt: str, width: int = 512, height: int = 512, steps: int = 30):
     try:
-        uid = int(user_id) if user_id not in (None, "", "None") else 0
         path = run_async(AI.generate_image_from_text(prompt, uid, width=width, height=height, steps=steps))
         return path
     except Exception as e:
@@ -102,9 +81,9 @@ def generate_image(user_id: Optional[int], prompt: str, width: int = 512, height
 def edit_image(user_id: Optional[int], image_file, mask_file, prompt: str = ""):
     try:
-        uid = int(user_id) if user_id not in (None, "", "None") else 0
         img_w = GradioFileWrapper(image_file)
-        mask_w = GradioFileWrapper(mask_file) if mask_file not in (None, "", "None") else None
         path = run_async(AI.edit_image_inpaint(img_w, mask_w, prompt, uid))
         return path
     except Exception as e:
@@ -112,7 +91,7 @@ def edit_image(user_id: Optional[int], image_file, mask_file, prompt: str = ""):
 def process_video(user_id: Optional[int], video_file):
     try:
-        uid = int(user_id) if user_id not in (None, "", "None") else 0
         wrapper = GradioFileWrapper(video_file)
         res = run_async(AI.process_video(wrapper, uid))
         return json.dumps(res, ensure_ascii=False, indent=2)
@@ -121,7 +100,7 @@ def process_video(user_id: Optional[int], video_file):
 def process_file(user_id: Optional[int], file_obj):
     try:
-        uid = int(user_id) if user_id not in (None, "", "None") else 0
         w = GradioFileWrapper(file_obj)
         res = run_async(AI.process_file(w, uid))
         return json.dumps(res, ensure_ascii=False, indent=2)
@@ -130,7 +109,7 @@ def process_file(user_id: Optional[int], file_obj):
 def code_complete(user_id: Optional[int], prompt: str, max_tokens: int = 512):
     try:
-        uid = int(user_id) if user_id not in (None, "", "None") else 0
         out = run_async(AI.code_complete(prompt, max_tokens=max_tokens))
         return out
     except Exception as e:
@@ -139,88 +118,72 @@ def code_complete(user_id: Optional[int], prompt: str, max_tokens: int = 512):
 # ------------------------------------------------------------------
 # Gradio UI
 # ------------------------------------------------------------------
-with gr.Blocks(title="Multimodal Bot (uses your multimodal_module.py)") as demo:
-    gr.Markdown("# Multimodal Bot\nThis Space uses the exact `multimodal_module.py` you uploaded. Use the tabs below.")
-    with gr.Tab("Text Chat"):
         with gr.Row():
             user_id_txt = gr.Textbox(label="User ID (optional)", placeholder="0")
             lang_sel = gr.Dropdown(choices=["en","zh","ja","ko","es","fr","de","it"], value="en", label="Language")
-        txt_in = gr.Textbox(label="User text", lines=4)
-        txt_out = gr.Textbox(label="Reply", lines=6)
-        txt_btn = gr.Button("Send")
-        txt_btn.click(fn=text_chat, inputs=[user_id_txt, txt_in, lang_sel], outputs=txt_out)
-    with gr.Tab("Voice (transcribe + emotion)"):
-        with gr.Row():
-            user_id_voice = gr.Textbox(label="User ID (optional)", placeholder="0")
-        voice_in = gr.Audio(source="upload", type="filepath", label="Upload voice (.ogg/.wav)")
         voice_out = gr.Textbox(label="Result JSON")
-        voice_btn = gr.Button("Process Voice")
-        voice_btn.click(fn=voice_process, inputs=[user_id_voice, voice_in], outputs=voice_out)
-    with gr.Tab("Voice Reply (TTS)"):
-        with gr.Row():
-            user_id_vr = gr.Textbox(label="User ID (optional)", placeholder="0")
-        vr_text = gr.Textbox(label="Text to convert to voice", lines=4)
         vr_fmt = gr.Dropdown(choices=["ogg","wav","mp3"], value="ogg", label="Format")
         vr_audio = gr.Audio(label="Generated Voice")
-        vr_btn = gr.Button("Generate Voice")
-        vr_btn.click(fn=generate_voice, inputs=[user_id_vr, vr_text, vr_fmt], outputs=vr_audio)
-    with gr.Tab("Image Caption"):
-        with gr.Row():
-            user_id_img = gr.Textbox(label="User ID (optional)", placeholder="0")
         img_in = gr.Image(type="filepath", label="Upload Image")
         img_out = gr.Textbox(label="Caption")
-        img_btn = gr.Button("Caption Image")
-        img_btn.click(fn=image_caption, inputs=[user_id_img, img_in], outputs=img_out)
-    with gr.Tab("Image Generate"):
-        with gr.Row():
-            user_id_gi = gr.Textbox(label="User ID (optional)", placeholder="0")
         prompt_in = gr.Textbox(label="Prompt", lines=3)
-        width = gr.Slider(minimum=256, maximum=1024, step=64, value=512, label="Width")
-        height = gr.Slider(minimum=256, maximum=1024, step=64, value=512, label="Height")
-        steps = gr.Slider(minimum=10, maximum=50, step=5, value=30, label="Steps")
         gen_out = gr.Image(type="filepath", label="Generated image")
-        gen_btn = gr.Button("Generate")
-        gen_btn.click(fn=generate_image, inputs=[user_id_gi, prompt_in, width, height, steps], outputs=gen_out)
-    with gr.Tab("Image Edit (Inpaint)"):
-        with gr.Row():
-            user_id_ie = gr.Textbox(label="User ID (optional)", placeholder="0")
         edit_img = gr.Image(type="filepath", label="Image to edit")
-        edit_mask = gr.Image(type="filepath", label="Mask (white=edit black=keep) (optional)")
-        edit_prompt = gr.Textbox(label="Prompt (what to paint)", lines=2)
         edit_out = gr.Image(type="filepath", label="Edited image")
-        edit_btn = gr.Button("Edit Image")
-        edit_btn.click(fn=edit_image, inputs=[user_id_ie, edit_img, edit_mask, edit_prompt], outputs=edit_out)
-    with gr.Tab("Video"):
-        with gr.Row():
-            user_id_vid = gr.Textbox(label="User ID (optional)", placeholder="0")
         vid_in = gr.Video(label="Upload video")
         vid_out = gr.Textbox(label="Result JSON")
-        vid_btn = gr.Button("Process Video")
-        vid_btn.click(fn=process_video, inputs=[user_id_vid, vid_in], outputs=vid_out)
-    with gr.Tab("Files (PDF/DOCX/TXT)"):
-        with gr.Row():
-            user_id_file = gr.Textbox(label="User ID (optional)", placeholder="0")
         file_in = gr.File(label="Upload file")
         file_out = gr.Textbox(label="Result JSON")
-        file_btn = gr.Button("Process File")
-        file_btn.click(fn=process_file, inputs=[user_id_file, file_in], outputs=file_out)
-    with gr.Tab("Code (StarCoder)"):
-        with gr.Row():
-            user_id_code = gr.Textbox(label="User ID (optional)", placeholder="0")
-        code_prompt = gr.Textbox(label="Prompt for code generation", lines=6)
         code_out = gr.Textbox(label="Generated code", lines=12)
-        code_btn = gr.Button("Generate Code")
-        code_btn.click(fn=code_complete, inputs=[user_id_code, code_prompt], outputs=code_out)
-    gr.Markdown("----\nYour underlying `multimodal_module.py` is used exactly as uploaded. Be patient on first calls — heavy models load lazily.")
-# Launch
 demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))

 from typing import Optional
 import gradio as gr
 from multimodal_module import MultiModalChatModule
+# Instantiate AI
 AI = MultiModalChatModule()
 # ------------------------------------------------------------------
+# File wrapper to adapt Gradio uploads to your module
 # ------------------------------------------------------------------
 class GradioFileWrapper:
     def __init__(self, gr_file):
         if isinstance(gr_file, str):
             self._path = gr_file
         else:
             try:
+                self._path = gr_file.name
             except Exception:
                 try:
                     self._path = gr_file["name"]
                 except Exception:
                     raise ValueError("Unsupported file object from Gradio")
     async def download_to_drive(self, dst_path: str) -> None:
         loop = asyncio.get_event_loop()
         await loop.run_in_executor(None, shutil.copyfile, self._path, dst_path)
 def run_async(coro):
     return asyncio.run(coro)
 # ------------------------------------------------------------------
+# Callback functions
 # ------------------------------------------------------------------
 def text_chat(user_id: Optional[int], text: str, lang: str = "en"):
     try:
+        uid = int(user_id) if user_id else 0
         reply = run_async(AI.generate_response(text, uid, lang))
         return reply
     except Exception as e:
 def voice_process(user_id: Optional[int], audio_file):
     try:
+        uid = int(user_id) if user_id else 0
         wrapper = GradioFileWrapper(audio_file)
         result = run_async(AI.process_voice_message(wrapper, uid))
         return json.dumps(result, ensure_ascii=False, indent=2)
     except Exception as e:
         return f"Error: {e}"
 def generate_voice(user_id: Optional[int], reply_text: str, fmt: str = "ogg"):
     try:
+        uid = int(user_id) if user_id else 0
         path = run_async(AI.generate_voice_reply(reply_text, uid, fmt))
         return path
     except Exception as e:
         return None, f"Error: {e}"
 def image_caption(user_id: Optional[int], image_file):
     try:
+        uid = int(user_id) if user_id else 0
         wrapper = GradioFileWrapper(image_file)
         caption = run_async(AI.process_image_message(wrapper, uid))
         return caption
 def generate_image(user_id: Optional[int], prompt: str, width: int = 512, height: int = 512, steps: int = 30):
     try:
+        uid = int(user_id) if user_id else 0
         path = run_async(AI.generate_image_from_text(prompt, uid, width=width, height=height, steps=steps))
         return path
     except Exception as e:
 def edit_image(user_id: Optional[int], image_file, mask_file, prompt: str = ""):
     try:
+        uid = int(user_id) if user_id else 0
         img_w = GradioFileWrapper(image_file)
+        mask_w = GradioFileWrapper(mask_file) if mask_file else None
         path = run_async(AI.edit_image_inpaint(img_w, mask_w, prompt, uid))
         return path
     except Exception as e:
 def process_video(user_id: Optional[int], video_file):
     try:
+        uid = int(user_id) if user_id else 0
         wrapper = GradioFileWrapper(video_file)
         res = run_async(AI.process_video(wrapper, uid))
         return json.dumps(res, ensure_ascii=False, indent=2)
 def process_file(user_id: Optional[int], file_obj):
     try:
+        uid = int(user_id) if user_id else 0
         w = GradioFileWrapper(file_obj)
         res = run_async(AI.process_file(w, uid))
         return json.dumps(res, ensure_ascii=False, indent=2)
 def code_complete(user_id: Optional[int], prompt: str, max_tokens: int = 512):
     try:
+        uid = int(user_id) if user_id else 0
         out = run_async(AI.code_complete(prompt, max_tokens=max_tokens))
         return out
     except Exception as e:
 # ------------------------------------------------------------------
 # Gradio UI
 # ------------------------------------------------------------------
+with gr.Blocks(title="Multimodal Bot (Gradio)") as demo:
+    gr.Markdown("# 🧠 Multimodal Bot\nInteract via text, voice, images, video, or files.")
+    with gr.Tab("💬 Text Chat"):
         with gr.Row():
             user_id_txt = gr.Textbox(label="User ID (optional)", placeholder="0")
             lang_sel = gr.Dropdown(choices=["en","zh","ja","ko","es","fr","de","it"], value="en", label="Language")
+        txt_in = gr.Textbox(label="Your message", lines=4)
+        txt_out = gr.Textbox(label="Bot reply", lines=6)
+        gr.Button("Send").click(text_chat, [user_id_txt, txt_in, lang_sel], txt_out)
+    with gr.Tab("🎤 Voice (Transcribe + Emotion)"):
+        user_id_voice = gr.Textbox(label="User ID (optional)", placeholder="0")
+        voice_in = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Record or upload voice (.ogg/.wav)")
         voice_out = gr.Textbox(label="Result JSON")
+        gr.Button("Process Voice").click(voice_process, [user_id_voice, voice_in], voice_out)
+    with gr.Tab("🔊 Voice Reply (TTS)"):
+        user_id_vr = gr.Textbox(label="User ID (optional)", placeholder="0")
+        vr_text = gr.Textbox(label="Text to speak", lines=4)
         vr_fmt = gr.Dropdown(choices=["ogg","wav","mp3"], value="ogg", label="Format")
         vr_audio = gr.Audio(label="Generated Voice")
+        gr.Button("Generate Voice").click(generate_voice, [user_id_vr, vr_text, vr_fmt], vr_audio)
+    with gr.Tab("🖼️ Image Caption"):
+        user_id_img = gr.Textbox(label="User ID (optional)", placeholder="0")
         img_in = gr.Image(type="filepath", label="Upload Image")
         img_out = gr.Textbox(label="Caption")
+        gr.Button("Caption Image").click(image_caption, [user_id_img, img_in], img_out)
+    with gr.Tab("🎨 Image Generate"):
+        user_id_gi = gr.Textbox(label="User ID (optional)", placeholder="0")
         prompt_in = gr.Textbox(label="Prompt", lines=3)
+        width = gr.Slider(256, 1024, 512, step=64, label="Width")
+        height = gr.Slider(256, 1024, 512, step=64, label="Height")
+        steps = gr.Slider(10, 50, 30, step=5, label="Steps")
         gen_out = gr.Image(type="filepath", label="Generated image")
+        gr.Button("Generate").click(generate_image, [user_id_gi, prompt_in, width, height, steps], gen_out)
+    with gr.Tab("✏️ Image Edit (Inpaint)"):
+        user_id_ie = gr.Textbox(label="User ID (optional)", placeholder="0")
         edit_img = gr.Image(type="filepath", label="Image to edit")
+        edit_mask = gr.Image(type="filepath", label="Mask (optional)")
+        edit_prompt = gr.Textbox(label="Prompt", lines=2)
         edit_out = gr.Image(type="filepath", label="Edited image")
+        gr.Button("Edit Image").click(edit_image, [user_id_ie, edit_img, edit_mask, edit_prompt], edit_out)
+    with gr.Tab("🎥 Video"):
+        user_id_vid = gr.Textbox(label="User ID (optional)", placeholder="0")
         vid_in = gr.Video(label="Upload video")
         vid_out = gr.Textbox(label="Result JSON")
+        gr.Button("Process Video").click(process_video, [user_id_vid, vid_in], vid_out)
+    with gr.Tab("📄 Files (PDF/DOCX/TXT)"):
+        user_id_file = gr.Textbox(label="User ID (optional)", placeholder="0")
         file_in = gr.File(label="Upload file")
         file_out = gr.Textbox(label="Result JSON")
+        gr.Button("Process File").click(process_file, [user_id_file, file_in], file_out)
+    with gr.Tab("💻 Code Generation"):
+        user_id_code = gr.Textbox(label="User ID (optional)", placeholder="0")
+        code_prompt = gr.Textbox(label="Code prompt", lines=6)
         code_out = gr.Textbox(label="Generated code", lines=12)
+        gr.Button("Generate Code").click(code_complete, [user_id_code, code_prompt], code_out)
+    gr.Markdown("----\nThis Space runs your exact `multimodal_module.py`. First requests may take longer due to model loading.")
+# Launch app
 demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))