Princeaka commited on
Commit
d6333ba
·
verified ·
1 Parent(s): 06d1612

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -150
app.py CHANGED
@@ -1,189 +1,133 @@
1
- # app.py - Gradio wrapper for your existing multimodal_module.py (unchanged)
2
  import os
3
  import shutil
4
  import asyncio
5
- import json
6
  from typing import Optional
7
 
8
  import gradio as gr
 
 
 
 
9
  from multimodal_module import MultiModalChatModule
10
 
11
- # Instantiate AI
12
  AI = MultiModalChatModule()
13
 
14
- # ------------------------------------------------------------------
15
- # File wrapper to adapt Gradio uploads to your module
16
- # ------------------------------------------------------------------
17
  class GradioFileWrapper:
18
- def __init__(self, gr_file):
19
- if isinstance(gr_file, str):
20
- self._path = gr_file
21
- else:
22
- try:
23
- self._path = gr_file.name
24
- except Exception:
25
- try:
26
- self._path = gr_file["name"]
27
- except Exception:
28
- raise ValueError("Unsupported file object from Gradio")
29
-
30
- async def download_to_drive(self, dst_path: str) -> None:
31
  loop = asyncio.get_event_loop()
32
  await loop.run_in_executor(None, shutil.copyfile, self._path, dst_path)
33
 
34
  def run_async(coro):
35
  return asyncio.run(coro)
36
 
37
- # ------------------------------------------------------------------
38
- # Callback functions
39
- # ------------------------------------------------------------------
40
- def text_chat(user_id: Optional[int], text: str, lang: str = "en"):
41
- try:
42
- uid = int(user_id) if user_id else 0
43
- reply = run_async(AI.generate_response(text, uid, lang))
44
- return reply
45
- except Exception as e:
46
- return f"Error: {e}"
47
-
48
- def voice_process(user_id: Optional[int], audio_file):
 
 
 
 
 
 
 
 
 
49
  try:
50
- uid = int(user_id) if user_id else 0
51
- wrapper = GradioFileWrapper(audio_file)
52
- result = run_async(AI.process_voice_message(wrapper, uid))
53
- return json.dumps(result, ensure_ascii=False, indent=2)
54
  except Exception as e:
55
- return f"Error: {e}"
56
 
57
- def generate_voice(user_id: Optional[int], reply_text: str, fmt: str = "ogg"):
 
58
  try:
59
- uid = int(user_id) if user_id else 0
60
- path = run_async(AI.generate_voice_reply(reply_text, uid, fmt))
61
- return path
 
 
 
62
  except Exception as e:
63
- return None, f"Error: {e}"
64
 
65
- def image_caption(user_id: Optional[int], image_file):
 
66
  try:
67
- uid = int(user_id) if user_id else 0
68
- wrapper = GradioFileWrapper(image_file)
69
- caption = run_async(AI.process_image_message(wrapper, uid))
70
- return caption
 
 
71
  except Exception as e:
72
- return f"Error: {e}"
73
 
74
- def generate_image(user_id: Optional[int], prompt: str, width: int = 512, height: int = 512, steps: int = 30):
 
75
  try:
76
- uid = int(user_id) if user_id else 0
77
- path = run_async(AI.generate_image_from_text(prompt, uid, width=width, height=height, steps=steps))
78
- return path
 
 
 
79
  except Exception as e:
80
- return f"Error: {e}"
81
 
82
- def edit_image(user_id: Optional[int], image_file, mask_file, prompt: str = ""):
 
83
  try:
84
- uid = int(user_id) if user_id else 0
85
- img_w = GradioFileWrapper(image_file)
86
- mask_w = GradioFileWrapper(mask_file) if mask_file else None
87
- path = run_async(AI.edit_image_inpaint(img_w, mask_w, prompt, uid))
88
- return path
 
89
  except Exception as e:
90
- return f"Error: {e}"
91
 
92
- def process_video(user_id: Optional[int], video_file):
93
- try:
94
- uid = int(user_id) if user_id else 0
95
- wrapper = GradioFileWrapper(video_file)
96
- res = run_async(AI.process_video(wrapper, uid))
97
- return json.dumps(res, ensure_ascii=False, indent=2)
98
- except Exception as e:
99
- return f"Error: {e}"
100
-
101
- def process_file(user_id: Optional[int], file_obj):
102
- try:
103
- uid = int(user_id) if user_id else 0
104
- w = GradioFileWrapper(file_obj)
105
- res = run_async(AI.process_file(w, uid))
106
- return json.dumps(res, ensure_ascii=False, indent=2)
107
- except Exception as e:
108
- return f"Error: {e}"
109
-
110
- def code_complete(user_id: Optional[int], prompt: str, max_tokens: int = 512):
111
- try:
112
- uid = int(user_id) if user_id else 0
113
- out = run_async(AI.code_complete(prompt, max_tokens=max_tokens))
114
- return out
115
- except Exception as e:
116
- return f"Error: {e}"
117
-
118
- # ------------------------------------------------------------------
119
  # Gradio UI
120
- # ------------------------------------------------------------------
121
- with gr.Blocks(title="Multimodal Bot (Gradio)") as demo:
122
  gr.Markdown("# 🧠 Multimodal Bot\nInteract via text, voice, images, video, or files.")
123
 
124
  with gr.Tab("💬 Text Chat"):
125
- with gr.Row():
126
- user_id_txt = gr.Textbox(label="User ID (optional)", placeholder="0")
127
- lang_sel = gr.Dropdown(choices=["en","zh","ja","ko","es","fr","de","it"], value="en", label="Language")
128
  txt_in = gr.Textbox(label="Your message", lines=4)
129
  txt_out = gr.Textbox(label="Bot reply", lines=6)
130
- gr.Button("Send").click(text_chat, [user_id_txt, txt_in, lang_sel], txt_out)
131
-
132
- with gr.Tab("🎤 Voice (Transcribe + Emotion)"):
133
- user_id_voice = gr.Textbox(label="User ID (optional)", placeholder="0")
134
- voice_in = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Record or upload voice (.ogg/.wav)")
135
- voice_out = gr.Textbox(label="Result JSON")
136
- gr.Button("Process Voice").click(voice_process, [user_id_voice, voice_in], voice_out)
137
-
138
- with gr.Tab("🔊 Voice Reply (TTS)"):
139
- user_id_vr = gr.Textbox(label="User ID (optional)", placeholder="0")
140
- vr_text = gr.Textbox(label="Text to speak", lines=4)
141
- vr_fmt = gr.Dropdown(choices=["ogg","wav","mp3"], value="ogg", label="Format")
142
- vr_audio = gr.Audio(label="Generated Voice")
143
- gr.Button("Generate Voice").click(generate_voice, [user_id_vr, vr_text, vr_fmt], vr_audio)
144
-
145
- with gr.Tab("🖼️ Image Caption"):
146
- user_id_img = gr.Textbox(label="User ID (optional)", placeholder="0")
147
- img_in = gr.Image(type="filepath", label="Upload Image")
148
  img_out = gr.Textbox(label="Caption")
149
- gr.Button("Caption Image").click(image_caption, [user_id_img, img_in], img_out)
150
-
151
- with gr.Tab("🎨 Image Generate"):
152
- user_id_gi = gr.Textbox(label="User ID (optional)", placeholder="0")
153
- prompt_in = gr.Textbox(label="Prompt", lines=3)
154
- width = gr.Slider(256, 1024, 512, step=64, label="Width")
155
- height = gr.Slider(256, 1024, 512, step=64, label="Height")
156
- steps = gr.Slider(10, 50, 30, step=5, label="Steps")
157
- gen_out = gr.Image(type="filepath", label="Generated image")
158
- gr.Button("Generate").click(generate_image, [user_id_gi, prompt_in, width, height, steps], gen_out)
159
-
160
- with gr.Tab("✏️ Image Edit (Inpaint)"):
161
- user_id_ie = gr.Textbox(label="User ID (optional)", placeholder="0")
162
- edit_img = gr.Image(type="filepath", label="Image to edit")
163
- edit_mask = gr.Image(type="filepath", label="Mask (optional)")
164
- edit_prompt = gr.Textbox(label="Prompt", lines=2)
165
- edit_out = gr.Image(type="filepath", label="Edited image")
166
- gr.Button("Edit Image").click(edit_image, [user_id_ie, edit_img, edit_mask, edit_prompt], edit_out)
167
-
168
- with gr.Tab("🎥 Video"):
169
- user_id_vid = gr.Textbox(label="User ID (optional)", placeholder="0")
170
- vid_in = gr.Video(label="Upload video")
171
- vid_out = gr.Textbox(label="Result JSON")
172
- gr.Button("Process Video").click(process_video, [user_id_vid, vid_in], vid_out)
173
-
174
- with gr.Tab("📄 Files (PDF/DOCX/TXT)"):
175
- user_id_file = gr.Textbox(label="User ID (optional)", placeholder="0")
176
- file_in = gr.File(label="Upload file")
177
- file_out = gr.Textbox(label="Result JSON")
178
- gr.Button("Process File").click(process_file, [user_id_file, file_in], file_out)
179
-
180
- with gr.Tab("💻 Code Generation"):
181
- user_id_code = gr.Textbox(label="User ID (optional)", placeholder="0")
182
- code_prompt = gr.Textbox(label="Code prompt", lines=6)
183
- code_out = gr.Textbox(label="Generated code", lines=12)
184
- gr.Button("Generate Code").click(code_complete, [user_id_code, code_prompt], code_out)
185
-
186
- gr.Markdown("----\nThis Space runs your exact `multimodal_module.py`. First requests may take longer due to model loading.")
187
-
188
- # Launch app
189
- demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
 
 
1
  import os
2
  import shutil
3
  import asyncio
 
4
  from typing import Optional
5
 
6
  import gradio as gr
7
+ from fastapi import FastAPI, UploadFile, Form
8
+ import uvicorn
9
+ import socket
10
+
11
  from multimodal_module import MultiModalChatModule
12
 
13
+ # Initialize AI module
14
  AI = MultiModalChatModule()
15
 
16
+ # ---------------------------
17
+ # Utility
18
+ # ---------------------------
19
  class GradioFileWrapper:
20
+ def __init__(self, file_path):
21
+ self._path = file_path
22
+
23
+ async def download_to_drive(self, dst_path: str):
 
 
 
 
 
 
 
 
 
24
  loop = asyncio.get_event_loop()
25
  await loop.run_in_executor(None, shutil.copyfile, self._path, dst_path)
26
 
27
  def run_async(coro):
28
  return asyncio.run(coro)
29
 
30
+ def get_free_port(default=7860):
31
+ """Find a free port if default is busy."""
32
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
33
+ try:
34
+ s.bind(("0.0.0.0", default))
35
+ return default
36
+ except OSError:
37
+ s.bind(("0.0.0.0", 0))
38
+ return s.getsockname()[1]
39
+
40
+ # ---------------------------
41
+ # FastAPI API for external apps
42
+ # ---------------------------
43
+ api = FastAPI()
44
+
45
+ @api.post("/api/text_chat")
46
+ async def api_text_chat(
47
+ user_id: Optional[int] = Form(0),
48
+ text: str = Form(...),
49
+ lang: str = Form("en")
50
+ ):
51
  try:
52
+ reply = await AI.generate_response(text, int(user_id), lang)
53
+ return {"reply": reply}
 
 
54
  except Exception as e:
55
+ return {"error": str(e)}
56
 
57
+ @api.post("/api/image_caption")
58
+ async def api_image_caption(user_id: Optional[int] = Form(0), image: UploadFile = None):
59
  try:
60
+ temp_path = f"/tmp/{image.filename}"
61
+ with open(temp_path, "wb") as f:
62
+ f.write(await image.read())
63
+ wrapper = GradioFileWrapper(temp_path)
64
+ caption = await AI.process_image_message(wrapper, int(user_id))
65
+ return {"caption": caption}
66
  except Exception as e:
67
+ return {"error": str(e)}
68
 
69
+ @api.post("/api/voice_process")
70
+ async def api_voice_process(user_id: Optional[int] = Form(0), audio: UploadFile = None):
71
  try:
72
+ temp_path = f"/tmp/{audio.filename}"
73
+ with open(temp_path, "wb") as f:
74
+ f.write(await audio.read())
75
+ wrapper = GradioFileWrapper(temp_path)
76
+ reply = await AI.process_voice_message(wrapper, int(user_id))
77
+ return {"reply": reply}
78
  except Exception as e:
79
+ return {"error": str(e)}
80
 
81
+ @api.post("/api/video_process")
82
+ async def api_video_process(user_id: Optional[int] = Form(0), video: UploadFile = None):
83
  try:
84
+ temp_path = f"/tmp/{video.filename}"
85
+ with open(temp_path, "wb") as f:
86
+ f.write(await video.read())
87
+ wrapper = GradioFileWrapper(temp_path)
88
+ reply = await AI.process_video_message(wrapper, int(user_id))
89
+ return {"reply": reply}
90
  except Exception as e:
91
+ return {"error": str(e)}
92
 
93
+ @api.post("/api/file_process")
94
+ async def api_file_process(user_id: Optional[int] = Form(0), file: UploadFile = None):
95
  try:
96
+ temp_path = f"/tmp/{file.filename}"
97
+ with open(temp_path, "wb") as f:
98
+ f.write(await file.read())
99
+ wrapper = GradioFileWrapper(temp_path)
100
+ reply = await AI.process_file_message(wrapper, int(user_id))
101
+ return {"reply": reply}
102
  except Exception as e:
103
+ return {"error": str(e)}
104
 
105
+ # ---------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  # Gradio UI
107
+ # ---------------------------
108
+ with gr.Blocks(title="Multimodal Bot") as demo:
109
  gr.Markdown("# 🧠 Multimodal Bot\nInteract via text, voice, images, video, or files.")
110
 
111
  with gr.Tab("💬 Text Chat"):
112
+ user_id_txt = gr.Textbox(label="User ID", placeholder="0")
113
+ lang_sel = gr.Dropdown(choices=["en","zh","ja","ko","es","fr","de","it"], value="en", label="Language")
 
114
  txt_in = gr.Textbox(label="Your message", lines=4)
115
  txt_out = gr.Textbox(label="Bot reply", lines=6)
116
+ gr.Button("Send").click(lambda uid, txt, lang: run_async(AI.generate_response(txt, int(uid or 0), lang)),
117
+ [user_id_txt, txt_in, lang_sel], txt_out)
118
+
119
+ with gr.Tab("🖼 Image Captioning"):
120
+ user_id_img = gr.Textbox(label="User ID", placeholder="0")
121
+ img_in = gr.Image(type="filepath", label="Upload an image")
 
 
 
 
 
 
 
 
 
 
 
 
122
  img_out = gr.Textbox(label="Caption")
123
+ gr.Button("Caption").click(lambda uid, img: run_async(AI.process_image_message(GradioFileWrapper(img), int(uid or 0))),
124
+ [user_id_img, img_in], img_out)
125
+
126
+ # ---------------------------
127
+ # Mount Gradio UI to FastAPI
128
+ # ---------------------------
129
+ api = gr.mount_gradio_app(api, demo, path="/")
130
+
131
+ if __name__ == "__main__":
132
+ port = get_free_port()
133
+ uvicorn.run(api, host="0.0.0.0", port=port)