Spaces:

Princeaka
/

multimodal_module

Running

App Files Files Community

Princeaka commited on about 1 month ago

Commit

02f51d7

verified ·

1 Parent(s): b6444d6

Update app.py

Browse files

Files changed (1) hide show

app.py +188 -164

app.py CHANGED Viewed

@@ -1,170 +1,194 @@
-# app.py — FastAPI + Gradio (External API + UI)
 import os
-os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Disable GPU
-os.environ["MPLBACKEND"] = "Agg"          # Non-interactive matplotlib
-os.environ["IMAGEIO_FFMPEG_EXE"] = "/usr/bin/ffmpeg"  # Explicit path
-import shutil
 import asyncio
-import inspect
-from typing import Optional
-from fastapi import FastAPI, UploadFile, File, Form
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse
 import gradio as gr
 from multimodal_module import MultiModalChatModule
-# Instantiate AI module
-AI = MultiModalChatModule()
-TMP_DIR = "/tmp"
-os.makedirs(TMP_DIR, exist_ok=True)
-# --- File wrapper ---
-class FileWrapper:
-    def __init__(self, path: str):
-        self._path = path
-    async def download_to_drive(self, dst_path: str):
-        loop = asyncio.get_event_loop()
-        await loop.run_in_executor(None, shutil.copyfile, self._path, dst_path)
-# --- Save uploaded file ---
-async def save_upload(up: UploadFile) -> str:
-    if not up or not up.filename:
-        raise ValueError("No file uploaded")
-    dest = os.path.join(TMP_DIR, up.filename)
-    data = await up.read()
-    with open(dest, "wb") as f:
-        f.write(data)
-    return dest
-# --- Call AI (sync or async) ---
-async def call_ai(fn, *args, **kwargs):
-    if fn is None:
-        raise AttributeError("Requested AI method not implemented")
-    if inspect.iscoroutinefunction(fn):
-        return await fn(*args, **kwargs)
-    return await asyncio.to_thread(lambda: fn(*args, **kwargs))
-# === FASTAPI APP ===
-app = FastAPI(title="Multimodal API")
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],  # change for production
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# --- API Endpoints ---
-@app.post("/api/text")
-async def api_text(text: str = Form(...), user_id: Optional[int] = Form(0), lang: str = Form("en")):
-    try:
-        fn = getattr(AI, "generate_response", getattr(AI, "process_text", None))
-        reply = await call_ai(fn, text, int(user_id), lang)
-        return {"status": "ok", "reply": reply}
-    except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
-@app.post("/api/voice")
-async def api_voice(user_id: Optional[int] = Form(0), audio_file: UploadFile = File(...)):
-    try:
-        path = await save_upload(audio_file)
-        fn = getattr(AI, "process_voice_message", None)
-        result = await call_ai(fn, FileWrapper(path), int(user_id))
-        return {"status": "ok", "result": result}
-    except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
-@app.post("/api/voice_reply")
-async def api_voice_reply(user_id: Optional[int] = Form(0), reply_text: str = Form(...), fmt: str = Form("ogg")):
-    try:
-        fn = getattr(AI, "generate_voice_reply", None)
-        result = await call_ai(fn, reply_text, int(user_id), fmt)
-        return {"status": "ok", "file": result}
-    except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
-@app.post("/api/image_caption")
-async def api_image_caption(user_id: Optional[int] = Form(0), image_file: UploadFile = File(...)):
-    try:
-        path = await save_upload(image_file)
-        fn = getattr(AI, "process_image_message", None)
-        caption = await call_ai(fn, FileWrapper(path), int(user_id))
-        return {"status": "ok", "caption": caption}
-    except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
-@app.post("/api/generate_image")
-async def api_generate_image(user_id: Optional[int] = Form(0), prompt: str = Form(...), width: int = Form(512), height: int = Form(512), steps: int = Form(30)):
-    try:
-        fn = getattr(AI, "generate_image_from_text", None)
-        out_path = await call_ai(fn, prompt, int(user_id), width, height, steps)
-        return {"status": "ok", "file": out_path}
-    except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
-@app.post("/api/edit_image")
-async def api_edit_image(user_id: Optional[int] = Form(0), image_file: UploadFile = File(...), mask_file: Optional[UploadFile] = File(None), prompt: str = Form("")):
-    try:
-        img_path = await save_upload(image_file)
-        mask_path = None
-        if mask_file:
-            mask_path = await save_upload(mask_file)
-        fn = getattr(AI, "edit_image_inpaint", None)
-        out_path = await call_ai(fn, FileWrapper(img_path), FileWrapper(mask_path) if mask_path else None, prompt, int(user_id))
-        return {"status": "ok", "file": out_path}
-    except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
-@app.post("/api/video")
-async def api_video(user_id: Optional[int] = Form(0), video_file: UploadFile = File(...)):
-    try:
-        path = await save_upload(video_file)
-        fn = getattr(AI, "process_video", None)
-        result = await call_ai(fn, FileWrapper(path), int(user_id))
-        return {"status": "ok", "result": result}
-    except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
-@app.post("/api/file")
-async def api_file(user_id: Optional[int] = Form(0), file_obj: UploadFile = File(...)):
-    try:
-        path = await save_upload(file_obj)
-        fn = getattr(AI, "process_file", None)
-        result = await call_ai(fn, FileWrapper(path), int(user_id))
-        return {"status": "ok", "result": result}
-    except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
-@app.post("/api/code")
-async def api_code(user_id: Optional[int] = Form(0), prompt: str = Form(...), max_tokens: int = Form(512)):
-    try:
-        fn = getattr(AI, "code_complete", None)
-        try:
-            result = await call_ai(fn, int(user_id), prompt, max_tokens)
-        except TypeError:
-            result = await call_ai(fn, prompt, max_tokens=max_tokens)
-        return {"status": "ok", "code": result}
-    except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
-# === GRADIO UI ===
-def gradio_text_fn(text, user_id, lang):
-    fn = getattr(AI, "generate_response", getattr(AI, "process_text", None))
-    loop = asyncio.get_event_loop()
-    return loop.run_until_complete(call_ai(fn, text, int(user_id or 0), lang))
-with gr.Blocks(title="Multimodal Bot") as demo:
-    gr.Markdown("# 🧠 Multimodal Bot — UI")
     with gr.Row():
-        uid = gr.Textbox(label="User ID", value="0")
-        lang = gr.Dropdown(["en", "zh", "ja", "ko", "es", "fr", "de", "it"], value="en", label="Language")
-    inp = gr.Textbox(lines=3, label="Message")
-    out = gr.Textbox(lines=6, label="Reply")
-    gr.Button("Send").click(gradio_text_fn, [inp, uid, lang], out)
-# Mount Gradio under /ui
-app = gr.mount_gradio_app(app, demo, path="/ui")

 import os
 import asyncio
+import shutil
 import gradio as gr
 from multimodal_module import MultiModalChatModule
+# Optional: keep model cache persistent across restarts
+os.makedirs("model_cache", exist_ok=True)
+os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")
+os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
+os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
+mm = MultiModalChatModule()
+# --- Small wrapper so Gradio file paths work with your module's .download_to_drive API ---
+class _GradioFile:
+    def __init__(self, path: str | None):
+        self.path = path
+    async def download_to_drive(self, dest: str):
+        if not self.path:
+            raise ValueError("No file path provided.")
+        shutil.copy(self.path, dest)
+# -------------------------
+# TEXT CHAT
+# -------------------------
+async def chat_fn(user_id: str, message: str, lang: str):
+    uid = int(user_id or "1")
+    message = message or ""
+    lang = (lang or "en").strip()
+    return await mm.generate_response(message, uid, lang=lang)
+# -------------------------
+# TTS (generate_voice_reply)
+# -------------------------
+async def tts_fn(user_id: str, text: str, fmt: str):
+    uid = int(user_id or "1")
+    out_path = await mm.generate_voice_reply(text or "", user_id=uid, fmt=fmt)
+    # Gradio expects the file path for Audio/Image outputs
+    return out_path
+# -------------------------
+# VOICE -> TEXT (+emotion)
+# -------------------------
+async def voice_fn(user_id: str, audio_path: str | None):
+    uid = int(user_id or "1")
+    if not audio_path:
+        return {"text": "", "language": "en", "emotion": "no_audio", "is_speech": False}
+    result = await mm.process_voice_message(_GradioFile(audio_path), user_id=uid)
+    return result
+# -------------------------
+# IMAGE: caption
+# -------------------------
+async def img_caption_fn(user_id: str, image_path: str | None):
+    uid = int(user_id or "1")
+    if not image_path:
+        return "No image provided."
+    caption = await mm.process_image_message(_GradioFile(image_path), user_id=uid)
+    return caption
+# -------------------------
+# IMAGE: text2img
+# -------------------------
+async def img_generate_fn(user_id: str, prompt: str, width: int, height: int, steps: int):
+    uid = int(user_id or "1")
+    img_path = await mm.generate_image_from_text(prompt or "", user_id=uid, width=width, height=height, steps=steps)
+    return img_path
+# -------------------------
+# IMAGE: inpaint
+# -------------------------
+async def img_inpaint_fn(user_id: str, image_path: str | None, mask_path: str | None, prompt: str):
+    uid = int(user_id or "1")
+    if not image_path:
+        return None
+    out_path = await mm.edit_image_inpaint(
+        _GradioFile(image_path),
+        _GradioFile(mask_path) if mask_path else None,
+        prompt=prompt or "",
+        user_id=uid,
+    )
+    return out_path
+# -------------------------
+# VIDEO: process
+# -------------------------
+async def video_fn(user_id: str, video_path: str | None, max_frames: int):
+    uid = int(user_id or "1")
+    if not video_path:
+        return {"duration": 0, "fps": 0, "transcription": "", "captions": []}
+    result = await mm.process_video(_GradioFile(video_path), user_id=uid, max_frames=max_frames)
+    return result
+# -------------------------
+# FILE: process (pdf/docx/txt/csv)
+# -------------------------
+async def file_fn(user_id: str, file_path: str | None):
+    uid = int(user_id or "1")
+    if not file_path:
+        return {"summary": "", "length": 0, "type": ""}
+    result = await mm.process_file(_GradioFile(file_path), user_id=uid)
+    return result
+# -------------------------
+# CODE: complete
+# -------------------------
+async def code_complete_fn(prompt: str, max_tokens: int, temperature: float):
+    return await mm.code_complete(prompt or "", max_tokens=max_tokens, temperature=temperature)
+# -------------------------
+# CODE: execute (DANGEROUS)
+# -------------------------
+async def code_exec_fn(code: str, timeout: int):
+    # Your module already time-limits; still, treat as unsafe
+    result = await mm.execute_python_code(code or "", timeout=timeout)
+    # Present nicely
+    if "error" in result:
+        return f"ERROR: {result['error']}"
+    out = []
+    if result.get("stdout"):
+        out.append(f"[stdout]\n{result['stdout']}")
+    if result.get("stderr"):
+        out.append(f"[stderr]\n{result['stderr']}")
+    return "\n".join(out).strip() or "(no output)"
+with gr.Blocks(title="Multimodal Space") as demo:
+    gr.Markdown("# 🔮 Multimodal Space")
     with gr.Row():
+        user_id = gr.Textbox(label="User ID", value="1", scale=1)
+        lang = gr.Textbox(label="Language code (e.g., en, fr, es)", value="en", scale=1)
+    with gr.Tab("💬 Chat"):
+        msg_in = gr.Textbox(label="Message")
+        msg_out = gr.Textbox(label="Response", interactive=False)
+        gr.Button("Send").click(chat_fn, [user_id, msg_in, lang], msg_out)
+    with gr.Tab("🗣️ Voice → Text (+ Emotion)"):
+        audio_in = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Upload/record voice (ogg/wav/mp3)")
+        voice_json = gr.JSON(label="Result")
+        gr.Button("Transcribe & Analyze").click(voice_fn, [user_id, audio_in], voice_json)
+    with gr.Tab("🔊 TTS"):
+        tts_text = gr.Textbox(label="Text to speak")
+        tts_fmt = gr.Dropdown(choices=["ogg", "wav", "mp3"], value="ogg", label="Format")
+        tts_audio = gr.Audio(label="Generated Audio", interactive=False)
+        gr.Button("Generate Voice Reply").click(tts_fn, [user_id, tts_text, tts_fmt], tts_audio)
+    with gr.Tab("🖼️ Image Caption"):
+        img_in = gr.Image(type="filepath", label="Image")
+        caption_out = gr.Textbox(label="Caption", interactive=False)
+        gr.Button("Caption").click(img_caption_fn, [user_id, img_in], caption_out)
+    with gr.Tab("🎨 Text → Image"):
+        ti_prompt = gr.Textbox(label="Prompt")
+        with gr.Row():
+            ti_w = gr.Slider(256, 768, value=512, step=64, label="Width")
+            ti_h = gr.Slider(256, 768, value=512, step=64, label="Height")
+            ti_steps = gr.Slider(10, 50, value=30, step=1, label="Steps")
+        ti_out = gr.Image(label="Generated Image", interactive=False, type="filepath")
+        gr.Button("Generate").click(img_generate_fn, [user_id, ti_prompt, ti_w, ti_h, ti_steps], ti_out)
+    with gr.Tab("🩹 Inpaint"):
+        base_img = gr.Image(type="filepath", label="Base image")
+        mask_img = gr.Image(type="filepath", label="Mask (white = keep, black = edit)", optional=True)
+        inpaint_prompt = gr.Textbox(label="Prompt")
+        inpaint_out = gr.Image(label="Edited Image", interactive=False, type="filepath")
+        gr.Button("Inpaint").click(img_inpaint_fn, [user_id, base_img, mask_img, inpaint_prompt], inpaint_out)
+    with gr.Tab("🎞️ Video"):
+        vid_in = gr.Video(label="Video file")
+        max_frames = gr.Slider(1, 12, value=4, step=1, label="Max keyframes to sample")
+        vid_json = gr.JSON(label="Result (duration/fps/transcript/captions)")
+        gr.Button("Process Video").click(video_fn, [user_id, vid_in, max_frames], vid_json)
+    with gr.Tab("📄 File"):
+        file_in = gr.File(label="Upload file (pdf/docx/txt/csv)", type="filepath")
+        file_json = gr.JSON(label="Summary")
+        gr.Button("Process File").click(file_fn, [user_id, file_in], file_json)
+    with gr.Tab("👨‍💻 Code"):
+        cc_prompt = gr.Textbox(label="Completion prompt")
+        cc_tokens = gr.Slider(16, 1024, value=256, step=16, label="Max tokens")
+        cc_temp = gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="Temperature")
+        cc_out = gr.Code(label="Completion")
+        gr.Button("Complete").click(code_complete_fn, [cc_prompt, cc_tokens, cc_temp], cc_out)
+        ce_code = gr.Code(label="Execute Python (sandboxed, time-limited)")
+        ce_timeout = gr.Slider(1, 10, value=5, step=1, label="Timeout (s)")
+        ce_out = gr.Code(label="Exec output")
+        gr.Button("Run Code").click(code_exec_fn, [ce_code, ce_timeout], ce_out)
+# Make API-callable and Space-visible
+demo.queue(concurrency_count=2, max_size=32).launch(server_name="0.0.0.0")