Princeaka commited on
Commit
bb4627c
·
verified ·
1 Parent(s): 052fcb6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +217 -108
app.py CHANGED
@@ -1,117 +1,226 @@
 
1
  import os
2
  import shutil
3
  import asyncio
 
 
 
4
  import gradio as gr
 
 
5
  from multimodal_module import MultiModalChatModule
6
 
7
- # Initialize module
8
- mm = MultiModalChatModule()
9
-
10
- # Environment configuration (already safe but keep)
11
- os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
12
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
13
-
14
- os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Disable GPU
15
-
16
- os.environ["IMAGEIO_FFMPEG_EXE"] = "/usr/bin/ffmpeg" # Explicit path
17
- os.environ["FFMPEG_BINARY"] = "/usr/bin/ffmpeg" # Backup for older versions
18
-
19
- # A tiny async-compatible "file-like" wrapper so your multimodal_module methods
20
- # (which expect objects with an async download_to_drive(...) method) work
21
- class AsyncPathWrapper:
22
- def __init__(self, path: str):
23
- self.path = path
24
-
25
- async def download_to_drive(self, dst_path: str):
26
- # perform copy synchronously but keep API async
27
- try:
28
- os.makedirs(os.path.dirname(dst_path), exist_ok=True)
29
- shutil.copy(self.path, dst_path)
30
- except Exception as e:
31
- # raise to allow upper-level error handling
32
- raise
33
-
34
- # Helper to call async methods from sync Gradio callbacks
35
- def run_async(fn, *args, **kwargs):
36
- return asyncio.run(fn(*args, **kwargs))
37
-
38
- # Wrappers that adapt Gradio returned file paths to the module's expected interface
39
- def _wrap_audio(audio_path):
40
- if not audio_path:
41
- return None
42
- return AsyncPathWrapper(audio_path)
43
-
44
- def _wrap_image(image_path):
45
- if not image_path:
46
- return None
47
- return AsyncPathWrapper(image_path)
48
-
49
- def _wrap_file(file_path):
50
- if not file_path:
51
- return None
52
- return AsyncPathWrapper(file_path)
53
-
54
- # Gradio binding functions
55
- def process_voice(audio_filepath, user_id):
56
- # mm.process_voice_message expects an object with download_to_drive
57
- wrapped = _wrap_audio(audio_filepath)
58
- return run_async(mm.process_voice_message, wrapped, int(user_id))
59
-
60
- def process_image(image_filepath, user_id):
61
- wrapped = _wrap_image(image_filepath)
62
- return run_async(mm.process_image_message, wrapped, int(user_id))
63
-
64
- def chat(text, user_id, lang):
65
- return run_async(mm.generate_response, text, int(user_id), lang)
66
-
67
- def generate_image(prompt, user_id):
68
- return run_async(mm.generate_image_from_text, prompt, int(user_id))
69
-
70
- def process_file(file_path, user_id):
71
- wrapped = _wrap_file(file_path)
72
- return run_async(mm.process_file, wrapped, int(user_id))
73
-
74
- with gr.Blocks(title="Multimodal AI Assistant") as app:
75
- gr.Markdown("## 🚀 Multimodal AI Assistant (Space-friendly)")
76
-
77
- with gr.Tab("💬 Text Chat"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  with gr.Row():
79
- user_id_txt = gr.Textbox(label="User ID", value="123")
80
- lang = gr.Dropdown(["en", "es", "fr", "de"], label="Language", value="en")
81
- chat_input = gr.Textbox(label="Your Message")
82
- chat_output = gr.Textbox(label="AI Response", interactive=False)
83
- chat_btn = gr.Button("Send")
84
- chat_btn.click(fn=chat, inputs=[chat_input, user_id_txt, lang], outputs=chat_output)
85
-
86
- with gr.Tab("🎙️ Voice"):
87
- voice_input = gr.Audio(source="microphone", type="filepath", label="Speak or upload an audio file")
88
- voice_user = gr.Textbox(label="User ID", value="123")
89
- voice_output = gr.JSON(label="Analysis Results")
90
- voice_btn = gr.Button("Process")
91
- voice_btn.click(fn=process_voice, inputs=[voice_input, voice_user], outputs=voice_output)
92
-
93
- with gr.Tab("🖼️ Images"):
94
- with gr.Tab("Describe"):
95
- img_input = gr.Image(type="filepath", label="Upload an image")
96
- img_user = gr.Textbox(label="User ID", value="123")
97
- img_output = gr.Textbox(label="Description")
98
- img_btn = gr.Button("Describe")
99
- img_btn.click(fn=process_image, inputs=[img_input, img_user], outputs=img_output)
100
-
101
- with gr.Tab("Generate"):
102
- gen_prompt = gr.Textbox(label="Prompt")
103
- gen_user = gr.Textbox(label="User ID", value="123")
104
- gen_output = gr.Image(label="Generated Image")
105
- gen_btn = gr.Button("Generate")
106
- gen_btn.click(fn=generate_image, inputs=[gen_prompt, gen_user], outputs=gen_output)
107
-
108
- with gr.Tab("📄 Files"):
109
- file_input = gr.File(file_count="single", label="Upload a document (pdf, txt, docx)")
110
- file_user = gr.Textbox(label="User ID", value="123")
111
- file_output = gr.JSON(label="File Processing Result")
112
  file_btn = gr.Button("Process File")
113
- file_btn.click(fn=process_file, inputs=[file_input, file_user], outputs=file_output)
 
 
 
 
 
 
 
 
 
 
114
 
115
- if __name__ == "__main__":
116
- # Let Spaces manage server settings. This still works locally.
117
- app.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
 
1
+ # app.py - Gradio wrapper for your existing multimodal_module.py (unchanged)
2
  import os
3
  import shutil
4
  import asyncio
5
+ import json
6
+ from typing import Optional
7
+
8
  import gradio as gr
9
+
10
+ # Import your multimodal module exactly as-is
11
  from multimodal_module import MultiModalChatModule
12
 
13
+ # Instantiate your AI (will lazy-load inside your module)
14
+ AI = MultiModalChatModule()
15
+
16
+ # ------------------------------------------------------------------
17
+ # Helpers / adaptation layer
18
+ # Your multimodal methods expect an object that has an async
19
+ # `download_to_drive(path)` method. Gradio file upload gives us a
20
+ # local temp file path (with attribute .name). We'll wrap it.
21
+ # ------------------------------------------------------------------
22
+ class GradioFileWrapper:
23
+ def __init__(self, gr_file):
24
+ """
25
+ gr_file: Gradio UploadedFile object or path string
26
+ - In Gradio, the value passed is a dict/path or a tempfile Path object.
27
+ """
28
+ # If gradio passes a dict with "name" or direct path string, handle both.
29
+ self._path = None
30
+ if isinstance(gr_file, str):
31
+ # already a path
32
+ self._path = gr_file
33
+ else:
34
+ # gradio may give a file-like object with .name attribute
35
+ try:
36
+ self._path = gr_file.name # typical for gradio
37
+ except Exception:
38
+ # fallback: convert dict to path if needed
39
+ try:
40
+ self._path = gr_file["name"]
41
+ except Exception:
42
+ raise ValueError("Unsupported file object from Gradio")
43
+
44
+ async def download_to_drive(self, dst_path: str) -> None:
45
+ # Asynchronous signature to match your module's expectations.
46
+ # We will copy the local file path to dst_path.
47
+ # Gradio stores the uploaded file locally, so simple copy works.
48
+ loop = asyncio.get_event_loop()
49
+ await loop.run_in_executor(None, shutil.copyfile, self._path, dst_path)
50
+
51
+ # Small helper to call async functions from sync Gradio callbacks
52
+ def run_async(coro):
53
+ return asyncio.run(coro)
54
+
55
+ # ------------------------------------------------------------------
56
+ # Gradio callback wrappers
57
+ # ------------------------------------------------------------------
58
+
59
+ def text_chat(user_id: Optional[int], text: str, lang: str = "en"):
60
+ try:
61
+ uid = int(user_id) if user_id not in (None, "", "None") else 0
62
+ reply = run_async(AI.generate_response(text, uid, lang))
63
+ return reply
64
+ except Exception as e:
65
+ return f"Error: {e}"
66
+
67
+ def voice_process(user_id: Optional[int], audio_file):
68
+ try:
69
+ uid = int(user_id) if user_id not in (None, "", "None") else 0
70
+ wrapper = GradioFileWrapper(audio_file)
71
+ result = run_async(AI.process_voice_message(wrapper, uid))
72
+ # return a readable text blob with details
73
+ return json.dumps(result, ensure_ascii=False, indent=2)
74
+ except Exception as e:
75
+ return f"Error: {e}"
76
+
77
+ def generate_voice(user_id: Optional[int], reply_text: str, fmt: str = "ogg"):
78
+ try:
79
+ uid = int(user_id) if user_id not in (None, "", "None") else 0
80
+ path = run_async(AI.generate_voice_reply(reply_text, uid, fmt))
81
+ # Gradio audio accepts a path
82
+ return path
83
+ except Exception as e:
84
+ return None, f"Error: {e}"
85
+
86
+ def image_caption(user_id: Optional[int], image_file):
87
+ try:
88
+ uid = int(user_id) if user_id not in (None, "", "None") else 0
89
+ wrapper = GradioFileWrapper(image_file)
90
+ caption = run_async(AI.process_image_message(wrapper, uid))
91
+ return caption
92
+ except Exception as e:
93
+ return f"Error: {e}"
94
+
95
+ def generate_image(user_id: Optional[int], prompt: str, width: int = 512, height: int = 512, steps: int = 30):
96
+ try:
97
+ uid = int(user_id) if user_id not in (None, "", "None") else 0
98
+ path = run_async(AI.generate_image_from_text(prompt, uid, width=width, height=height, steps=steps))
99
+ return path
100
+ except Exception as e:
101
+ return f"Error: {e}"
102
+
103
+ def edit_image(user_id: Optional[int], image_file, mask_file, prompt: str = ""):
104
+ try:
105
+ uid = int(user_id) if user_id not in (None, "", "None") else 0
106
+ img_w = GradioFileWrapper(image_file)
107
+ mask_w = GradioFileWrapper(mask_file) if mask_file not in (None, "", "None") else None
108
+ path = run_async(AI.edit_image_inpaint(img_w, mask_w, prompt, uid))
109
+ return path
110
+ except Exception as e:
111
+ return f"Error: {e}"
112
+
113
+ def process_video(user_id: Optional[int], video_file):
114
+ try:
115
+ uid = int(user_id) if user_id not in (None, "", "None") else 0
116
+ wrapper = GradioFileWrapper(video_file)
117
+ res = run_async(AI.process_video(wrapper, uid))
118
+ return json.dumps(res, ensure_ascii=False, indent=2)
119
+ except Exception as e:
120
+ return f"Error: {e}"
121
+
122
+ def process_file(user_id: Optional[int], file_obj):
123
+ try:
124
+ uid = int(user_id) if user_id not in (None, "", "None") else 0
125
+ w = GradioFileWrapper(file_obj)
126
+ res = run_async(AI.process_file(w, uid))
127
+ return json.dumps(res, ensure_ascii=False, indent=2)
128
+ except Exception as e:
129
+ return f"Error: {e}"
130
+
131
+ def code_complete(user_id: Optional[int], prompt: str, max_tokens: int = 512):
132
+ try:
133
+ uid = int(user_id) if user_id not in (None, "", "None") else 0
134
+ out = run_async(AI.code_complete(prompt, max_tokens=max_tokens))
135
+ return out
136
+ except Exception as e:
137
+ return f"Error: {e}"
138
+
139
+ # ------------------------------------------------------------------
140
+ # Gradio UI
141
+ # ------------------------------------------------------------------
142
+ with gr.Blocks(title="Multimodal Bot (uses your multimodal_module.py)") as demo:
143
+ gr.Markdown("# Multimodal Bot\nThis Space uses the exact `multimodal_module.py` you uploaded. Use the tabs below.")
144
+ with gr.Tab("Text Chat"):
145
+ with gr.Row():
146
+ user_id_txt = gr.Textbox(label="User ID (optional)", placeholder="0")
147
+ lang_sel = gr.Dropdown(choices=["en","zh","ja","ko","es","fr","de","it"], value="en", label="Language")
148
+ txt_in = gr.Textbox(label="User text", lines=4)
149
+ txt_out = gr.Textbox(label="Reply", lines=6)
150
+ txt_btn = gr.Button("Send")
151
+ txt_btn.click(fn=text_chat, inputs=[user_id_txt, txt_in, lang_sel], outputs=txt_out)
152
+
153
+ with gr.Tab("Voice (transcribe + emotion)"):
154
+ with gr.Row():
155
+ user_id_voice = gr.Textbox(label="User ID (optional)", placeholder="0")
156
+ voice_in = gr.Audio(source="upload", type="filepath", label="Upload voice (.ogg/.wav)")
157
+ voice_out = gr.Textbox(label="Result JSON")
158
+ voice_btn = gr.Button("Process Voice")
159
+ voice_btn.click(fn=voice_process, inputs=[user_id_voice, voice_in], outputs=voice_out)
160
+
161
+ with gr.Tab("Voice Reply (TTS)"):
162
+ with gr.Row():
163
+ user_id_vr = gr.Textbox(label="User ID (optional)", placeholder="0")
164
+ vr_text = gr.Textbox(label="Text to convert to voice", lines=4)
165
+ vr_fmt = gr.Dropdown(choices=["ogg","wav","mp3"], value="ogg", label="Format")
166
+ vr_audio = gr.Audio(label="Generated Voice")
167
+ vr_btn = gr.Button("Generate Voice")
168
+ vr_btn.click(fn=generate_voice, inputs=[user_id_vr, vr_text, vr_fmt], outputs=vr_audio)
169
+
170
+ with gr.Tab("Image Caption"):
171
+ with gr.Row():
172
+ user_id_img = gr.Textbox(label="User ID (optional)", placeholder="0")
173
+ img_in = gr.Image(type="filepath", label="Upload Image")
174
+ img_out = gr.Textbox(label="Caption")
175
+ img_btn = gr.Button("Caption Image")
176
+ img_btn.click(fn=image_caption, inputs=[user_id_img, img_in], outputs=img_out)
177
+
178
+ with gr.Tab("Image Generate"):
179
  with gr.Row():
180
+ user_id_gi = gr.Textbox(label="User ID (optional)", placeholder="0")
181
+ prompt_in = gr.Textbox(label="Prompt", lines=3)
182
+ width = gr.Slider(minimum=256, maximum=1024, step=64, value=512, label="Width")
183
+ height = gr.Slider(minimum=256, maximum=1024, step=64, value=512, label="Height")
184
+ steps = gr.Slider(minimum=10, maximum=50, step=5, value=30, label="Steps")
185
+ gen_out = gr.Image(type="filepath", label="Generated image")
186
+ gen_btn = gr.Button("Generate")
187
+ gen_btn.click(fn=generate_image, inputs=[user_id_gi, prompt_in, width, height, steps], outputs=gen_out)
188
+
189
+ with gr.Tab("Image Edit (Inpaint)"):
190
+ with gr.Row():
191
+ user_id_ie = gr.Textbox(label="User ID (optional)", placeholder="0")
192
+ edit_img = gr.Image(type="filepath", label="Image to edit")
193
+ edit_mask = gr.Image(type="filepath", label="Mask (white=edit black=keep) (optional)")
194
+ edit_prompt = gr.Textbox(label="Prompt (what to paint)", lines=2)
195
+ edit_out = gr.Image(type="filepath", label="Edited image")
196
+ edit_btn = gr.Button("Edit Image")
197
+ edit_btn.click(fn=edit_image, inputs=[user_id_ie, edit_img, edit_mask, edit_prompt], outputs=edit_out)
198
+
199
+ with gr.Tab("Video"):
200
+ with gr.Row():
201
+ user_id_vid = gr.Textbox(label="User ID (optional)", placeholder="0")
202
+ vid_in = gr.Video(label="Upload video")
203
+ vid_out = gr.Textbox(label="Result JSON")
204
+ vid_btn = gr.Button("Process Video")
205
+ vid_btn.click(fn=process_video, inputs=[user_id_vid, vid_in], outputs=vid_out)
206
+
207
+ with gr.Tab("Files (PDF/DOCX/TXT)"):
208
+ with gr.Row():
209
+ user_id_file = gr.Textbox(label="User ID (optional)", placeholder="0")
210
+ file_in = gr.File(label="Upload file")
211
+ file_out = gr.Textbox(label="Result JSON")
 
212
  file_btn = gr.Button("Process File")
213
+ file_btn.click(fn=process_file, inputs=[user_id_file, file_in], outputs=file_out)
214
+
215
+ with gr.Tab("Code (StarCoder)"):
216
+ with gr.Row():
217
+ user_id_code = gr.Textbox(label="User ID (optional)", placeholder="0")
218
+ code_prompt = gr.Textbox(label="Prompt for code generation", lines=6)
219
+ code_out = gr.Textbox(label="Generated code", lines=12)
220
+ code_btn = gr.Button("Generate Code")
221
+ code_btn.click(fn=code_complete, inputs=[user_id_code, code_prompt], outputs=code_out)
222
+
223
+ gr.Markdown("----\nYour underlying `multimodal_module.py` is used exactly as uploaded. Be patient on first calls — heavy models load lazily.")
224
 
225
+ # Launch
226
+ demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))