Spaces:

Princeaka
/

multimodal_module

Build error

App Files Files Community

Princeaka commited on 10 days ago

Commit

1de5d77

verified ·

1 Parent(s): a657484

Update app.py

Browse files

Files changed (1) hide show

app.py +145 -199

app.py CHANGED Viewed

@@ -1,242 +1,188 @@
-# app.py - Hybrid Gradio + FastAPI wrapper for multimodal_module.py
 import os
 import shutil
 import asyncio
-import json
 from typing import Optional
 import gradio as gr
-from fastapi import FastAPI, Request
 from multimodal_module import MultiModalChatModule
-# Instantiate AI
 AI = MultiModalChatModule()
-# ============================================================
-# Helper: File wrapper for Gradio uploads
-# ============================================================
-class GradioFileWrapper:
-    def __init__(self, gr_file):
-        if isinstance(gr_file, str):
-            self._path = gr_file
-        else:
-            try:
-                self._path = gr_file.name
-            except Exception:
-                try:
-                    self._path = gr_file["name"]
-                except Exception:
-                    raise ValueError("Unsupported file object from Gradio")
     async def download_to_drive(self, dst_path: str) -> None:
         loop = asyncio.get_event_loop()
         await loop.run_in_executor(None, shutil.copyfile, self._path, dst_path)
-# ============================================================
-# Async-safe helper
-# ============================================================
-def run_async(coro):
     try:
-        loop = asyncio.get_running_loop()
-    except RuntimeError:
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-        loop = asyncio.get_event_loop()
-    return loop.run_until_complete(coro)
-# ============================================================
-# Callback functions (used by Gradio & API)
-# ============================================================
-def text_chat(user_id: Optional[int], text: str, lang: str = "en"):
-    try:
-        uid = int(user_id) if user_id else 0
-        reply = run_async(AI.generate_response(text, uid, lang))
-        return reply
     except Exception as e:
-        return f"Error: {e}"
-def voice_process(user_id: Optional[int], audio_file):
     try:
-        uid = int(user_id) if user_id else 0
-        wrapper = GradioFileWrapper(audio_file)
-        result = run_async(AI.process_voice_message(wrapper, uid))
-        return json.dumps(result, ensure_ascii=False, indent=2)
     except Exception as e:
-        return f"Error: {e}"
-def generate_voice(user_id: Optional[int], reply_text: str, fmt: str = "ogg"):
     try:
-        uid = int(user_id) if user_id else 0
-        path = run_async(AI.generate_voice_reply(reply_text, uid, fmt))
-        return path
     except Exception as e:
-        return None, f"Error: {e}"
-def image_caption(user_id: Optional[int], image_file):
     try:
-        uid = int(user_id) if user_id else 0
-        wrapper = GradioFileWrapper(image_file)
-        caption = run_async(AI.process_image_message(wrapper, uid))
-        return caption
     except Exception as e:
-        return f"Error: {e}"
-def generate_image(user_id: Optional[int], prompt: str, width: int = 512, height: int = 512, steps: int = 30):
     try:
-        uid = int(user_id) if user_id else 0
-        path = run_async(AI.generate_image_from_text(prompt, uid, width=width, height=height, steps=steps))
-        return path
     except Exception as e:
-        return f"Error: {e}"
-def edit_image(user_id: Optional[int], image_file, mask_file, prompt: str = ""):
     try:
-        uid = int(user_id) if user_id else 0
-        img_w = GradioFileWrapper(image_file)
-        mask_w = GradioFileWrapper(mask_file) if mask_file else None
-        path = run_async(AI.edit_image_inpaint(img_w, mask_w, prompt, uid))
-        return path
     except Exception as e:
-        return f"Error: {e}"
-def process_video(user_id: Optional[int], video_file):
     try:
-        uid = int(user_id) if user_id else 0
-        wrapper = GradioFileWrapper(video_file)
-        res = run_async(AI.process_video(wrapper, uid))
-        return json.dumps(res, ensure_ascii=False, indent=2)
     except Exception as e:
-        return f"Error: {e}"
-def process_file(user_id: Optional[int], file_obj):
     try:
-        uid = int(user_id) if user_id else 0
-        w = GradioFileWrapper(file_obj)
-        res = run_async(AI.process_file(w, uid))
-        return json.dumps(res, ensure_ascii=False, indent=2)
     except Exception as e:
-        return f"Error: {e}"
-def code_complete(user_id: Optional[int], prompt: str, max_tokens: int = 512):
     try:
-        uid = int(user_id) if user_id else 0
-        out = run_async(AI.code_complete(prompt, max_tokens=max_tokens))
-        return out
     except Exception as e:
-        return f"Error: {e}"
-# ============================================================
-# FastAPI public API
-# ============================================================
-api = FastAPI()
-@api.post("/api/predict")
-async def api_predict(request: Request):
     try:
-        data = await request.json()
-        user_id = data.get("user_id", 0)
-        text = data.get("text", "")
-        lang = data.get("lang", "en")
-        reply = text_chat(user_id, text, lang)
-        return {"status": "ok", "reply": reply}
     except Exception as e:
-        return {"status": "error", "message": str(e)}
-# ============================================================
-# Gradio UI
-# ============================================================
-with gr.Blocks(title="Multimodal Bot (Gradio)") as demo:
-    gr.Markdown("# 🧠 Multimodal Bot\nInteract via text, voice, images, video, or files.")
-    with gr.Tab("💬 Text Chat"):
-        with gr.Row():
-            user_id_txt = gr.Textbox(label="User ID (optional)", placeholder="0")
-            lang_sel = gr.Dropdown(choices=["en","zh","ja","ko","es","fr","de","it"], value="en", label="Language")
-        txt_in = gr.Textbox(label="Your message", lines=4)
-        txt_out = gr.Textbox(label="Bot reply", lines=6)
-        gr.Button("Send").click(text_chat, [user_id_txt, txt_in, lang_sel], txt_out)
-    with gr.Tab("🎤 Voice (Transcribe + Emotion)"):
-        user_id_voice = gr.Textbox(label="User ID (optional)", placeholder="0")
-        voice_in = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Record or upload voice (.ogg/.wav)")
-        voice_out = gr.Textbox(label="Result JSON")
-        gr.Button("Process Voice").click(voice_process, [user_id_voice, voice_in], voice_out)
-    with gr.Tab("🔊 Voice Reply (TTS)"):
-        user_id_vr = gr.Textbox(label="User ID (optional)", placeholder="0")
-        vr_text = gr.Textbox(label="Text to speak", lines=4)
-        vr_fmt = gr.Dropdown(choices=["ogg","wav","mp3"], value="ogg", label="Format")
-        vr_audio = gr.Audio(label="Generated Voice")
-        gr.Button("Generate Voice").click(generate_voice, [user_id_vr, vr_text, vr_fmt], vr_audio)
-    with gr.Tab("🖼️ Image Caption"):
-        user_id_img = gr.Textbox(label="User ID (optional)", placeholder="0")
-        img_in = gr.Image(type="filepath", label="Upload Image")
-        img_out = gr.Textbox(label="Caption")
-        gr.Button("Caption Image").click(image_caption, [user_id_img, img_in], img_out)
-    with gr.Tab("🎨 Image Generate"):
-        user_id_gi = gr.Textbox(label="User ID (optional)", placeholder="0")
-        prompt_in = gr.Textbox(label="Prompt", lines=3)
-        width = gr.Slider(256, 1024, 512, step=64, label="Width")
-        height = gr.Slider(256, 1024, 512, step=64, label="Height")
-        steps = gr.Slider(10, 50, 30, step=5, label="Steps")
-        gen_out = gr.Image(type="filepath", label="Generated image")
-        gr.Button("Generate").click(generate_image, [user_id_gi, prompt_in, width, height, steps], gen_out)
-    with gr.Tab("✏️ Image Edit (Inpaint)"):
-        user_id_ie = gr.Textbox(label="User ID (optional)", placeholder="0")
-        edit_img = gr.Image(type="filepath", label="Image to edit")
-        edit_mask = gr.Image(type="filepath", label="Mask (optional)")
-        edit_prompt = gr.Textbox(label="Prompt", lines=2)
-        edit_out = gr.Image(type="filepath", label="Edited image")
-        gr.Button("Edit Image").click(edit_image, [user_id_ie, edit_img, edit_mask, edit_prompt], edit_out)
-    with gr.Tab("🎥 Video"):
-        user_id_vid = gr.Textbox(label="User ID (optional)", placeholder="0")
-        vid_in = gr.Video(label="Upload video")
-        vid_out = gr.Textbox(label="Result JSON")
-        gr.Button("Process Video").click(process_video, [user_id_vid, vid_in], vid_out)
-    with gr.Tab("📄 Files (PDF/DOCX/TXT)"):
-        user_id_file = gr.Textbox(label="User ID (optional)", placeholder="0")
-        file_in = gr.File(label="Upload file")
-        file_out = gr.Textbox(label="Result JSON")
-        gr.Button("Process File").click(process_file, [user_id_file, file_in], file_out)
-    with gr.Tab("💻 Code Generation"):
-        user_id_code = gr.Textbox(label="User ID (optional)", placeholder="0")
-        code_prompt = gr.Textbox(label="Code prompt", lines=6)
-        code_out = gr.Textbox(label="Generated code", lines=12)
-        gr.Button("Generate Code").click(code_complete, [user_id_code, code_prompt], code_out)
-    gr.Markdown("----\nThis Space runs your exact `multimodal_module.py`. First requests may take longer due to model loading.")
-# ============================================================
-# Launch both API + Gradio
-# ============================================================
-import uvicorn
-from threading import Thread
-def start_api():
-    uvicorn.run(api, host="0.0.0.0", port=8000)
-# Start FastAPI in a separate thread
-Thread(target=start_api, daemon=True).start()
-# Launch Gradio
-demo.queue()
-demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))

+# app.py -- HF-ready single-server FastAPI + Gradio mounted app
 import os
 import shutil
 import asyncio
+import inspect
 from typing import Optional
+from fastapi import FastAPI, UploadFile, File, Form
+from fastapi.responses import JSONResponse
 import gradio as gr
+import uvicorn
+# Import your real multimodal module
 from multimodal_module import MultiModalChatModule
+# Instantiate your AI module
 AI = MultiModalChatModule()
+# ---------- Helpers ----------
+TMP_DIR = "/tmp"
+os.makedirs(TMP_DIR, exist_ok=True)
+class FileWrapper:
+    """Simple path wrapper compatible with your existing code expectations."""
+    def __init__(self, path: str):
+        self._path = path
     async def download_to_drive(self, dst_path: str) -> None:
+        # keep API similar to your GradioFileWrapper
         loop = asyncio.get_event_loop()
         await loop.run_in_executor(None, shutil.copyfile, self._path, dst_path)
+def save_upload_to_tmp(up: UploadFile) -> str:
+    """Save FastAPI UploadFile to /tmp and return path."""
+    assert up and up.filename, "UploadFile missing filename"
+    dest = os.path.join(TMP_DIR, up.filename)
+    # overwrite if exists
+    with open(dest, "wb") as f:
+        f.write(up.file.read())
+    return dest
+async def call_ai(fn, *args, **kwargs):
+    """
+    Call AI functions safely: if fn is async, await it; if sync, run in thread.
+    This avoids blocking the event loop.
+    """
+    if inspect.iscoroutinefunction(fn):
+        return await fn(*args, **kwargs)
+    else:
+        # run sync function in a thread to avoid blocking
+        return await asyncio.to_thread(lambda: fn(*args, **kwargs))
+# ---------- FastAPI app ----------
+app = FastAPI(title="Multimodal Module API")
+# Optional: allow CORS if external web apps will call this
+from fastapi.middleware.cors import CORSMiddleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # change to specific domains for production
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ----------------- API endpoints -----------------
+@app.post("/api/predict")
+async def api_predict(inputs: str = Form(...), user_id: Optional[int] = Form(0), lang: str = Form("en")):
+    """
+    HuggingFace-style /predict compatibility.
+    Form field 'inputs' used as text.
+    """
     try:
+        reply = await call_ai(getattr(AI, "generate_response", getattr(AI, "process_text", None)), inputs, int(user_id), lang)
+        # HF-style returns "data" array
+        return {"data": [reply]}
     except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+@app.post("/api/text")
+async def api_text(text: str = Form(...), user_id: Optional[int] = Form(0), lang: str = Form("en")):
     try:
+        reply = await call_ai(getattr(AI, "generate_response", getattr(AI, "process_text", None)), text, int(user_id), lang)
+        return {"status": "ok", "reply": reply}
     except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+@app.post("/api/voice")
+async def api_voice(user_id: Optional[int] = Form(0), audio_file: UploadFile = File(...)):
+    """
+    Upload audio file (multipart/form-data). Returns whatever your AI.process_voice_message returns (JSON/dict).
+    """
     try:
+        path = save_upload_to_tmp(audio_file)
+        result = await call_ai(getattr(AI, "process_voice_message", None), FileWrapper(path), int(user_id))
+        return JSONResponse(result)
     except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+@app.post("/api/voice_reply")
+async def api_voice_reply(user_id: Optional[int] = Form(0), reply_text: str = Form(...), fmt: str = Form("ogg")):
     try:
+        result = await call_ai(getattr(AI, "generate_voice_reply", None), reply_text, int(user_id), fmt)
+        return {"status": "ok", "file": result}
     except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+@app.post("/api/image_caption")
+async def api_image_caption(user_id: Optional[int] = Form(0), image_file: UploadFile = File(...)):
     try:
+        path = save_upload_to_tmp(image_file)
+        caption = await call_ai(getattr(AI, "process_image_message", None), FileWrapper(path), int(user_id))
+        return {"status": "ok", "caption": caption}
     except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+@app.post("/api/generate_image")
+async def api_generate_image(user_id: Optional[int] = Form(0), prompt: str = Form(...), width: int = Form(512), height: int = Form(512), steps: int = Form(30)):
     try:
+        out_path = await call_ai(getattr(AI, "generate_image_from_text", None), prompt, int(user_id), width, height, steps)
+        return {"status": "ok", "file": out_path}
     except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+@app.post("/api/edit_image")
+async def api_edit_image(user_id: Optional[int] = Form(0), image_file: UploadFile = File(...), mask_file: Optional[UploadFile] = File(None), prompt: str = Form("")):
     try:
+        img_path = save_upload_to_tmp(image_file)
+        mask_path = None
+        if mask_file:
+            mask_path = save_upload_to_tmp(mask_file)
+        out_path = await call_ai(getattr(AI, "edit_image_inpaint", None), FileWrapper(img_path), FileWrapper(mask_path) if mask_path else None, prompt, int(user_id))
+        return {"status": "ok", "file": out_path}
     except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+@app.post("/api/video")
+async def api_video(user_id: Optional[int] = Form(0), video_file: UploadFile = File(...)):
     try:
+        path = save_upload_to_tmp(video_file)
+        result = await call_ai(getattr(AI, "process_video", None), FileWrapper(path), int(user_id))
+        return JSONResponse(result)
     except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+@app.post("/api/file")
+async def api_file(user_id: Optional[int] = Form(0), file_obj: UploadFile = File(...)):
     try:
+        path = save_upload_to_tmp(file_obj)
+        result = await call_ai(getattr(AI, "process_file", None), FileWrapper(path), int(user_id))
+        return JSONResponse(result)
     except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+@app.post("/api/code")
+async def api_code(user_id: Optional[int] = Form(0), prompt: str = Form(...), max_tokens: int = Form(512)):
     try:
+        result = await call_ai(getattr(AI, "code_complete", None), int(user_id), prompt, max_tokens)
+        # Some modules return string/code, others dict — normalize:
+        return {"status": "ok", "code": result}
     except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+# ---------- Minimal Gradio UI (mounted) ----------
+def gradio_text_fn(text, user_id, lang):
+    # call AI synchronously from Gradio (blocking safe in Gradio)
+    if inspect.iscoroutinefunction(getattr(AI, "generate_response", getattr(AI, "process_text", None))):
+        return asyncio.run(call_ai(getattr(AI, "generate_response", getattr(AI, "process_text", None)), text, int(user_id or 0), lang))
+    else:
+        # sync
+        return getattr(AI, "generate_response", getattr(AI, "process_text", None))(text, int(user_id or 0), lang)
+with gr.Blocks(title="Multimodal Bot (UI)") as demo:
+    gr.Markdown("# 🧠 Multimodal Bot — UI")
+    with gr.Row():
+        txt_uid = gr.Textbox(label="User ID", value="0")
+        txt_lang = gr.Dropdown(["en","zh","ja","ko","es","fr","de","it"], value="en", label="Language")
+    inp = gr.Textbox(lines=3, label="Message")
+    out = gr.Textbox(lines=6, label="Reply")
+    gr.Button("Send").click(gradio_text_fn, [inp, txt_uid, txt_lang], out)
+# Mount Gradio app at root
+app = gr.mount_gradio_app(app, demo, path="/")
+# ---------- Run server (HF Spaces uses this entrypoint) ----------
+if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 7860))
+    uvicorn.run(app, host="0.0.0.0", port=port)