Princeaka commited on
Commit
9858a63
Β·
verified Β·
1 Parent(s): 0174295

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -92
app.py CHANGED
@@ -6,59 +6,40 @@ import json
6
  from typing import Optional
7
 
8
  import gradio as gr
9
-
10
- # Import your multimodal module exactly as-is
11
  from multimodal_module import MultiModalChatModule
12
 
13
- # Instantiate your AI (will lazy-load inside your module)
14
  AI = MultiModalChatModule()
15
 
16
  # ------------------------------------------------------------------
17
- # Helpers / adaptation layer
18
- # Your multimodal methods expect an object that has an async
19
- # `download_to_drive(path)` method. Gradio file upload gives us a
20
- # local temp file path (with attribute .name). We'll wrap it.
21
  # ------------------------------------------------------------------
22
  class GradioFileWrapper:
23
  def __init__(self, gr_file):
24
- """
25
- gr_file: Gradio UploadedFile object or path string
26
- - In Gradio, the value passed is a dict/path or a tempfile Path object.
27
- """
28
- # If gradio passes a dict with "name" or direct path string, handle both.
29
- self._path = None
30
  if isinstance(gr_file, str):
31
- # already a path
32
  self._path = gr_file
33
  else:
34
- # gradio may give a file-like object with .name attribute
35
  try:
36
- self._path = gr_file.name # typical for gradio
37
  except Exception:
38
- # fallback: convert dict to path if needed
39
  try:
40
  self._path = gr_file["name"]
41
  except Exception:
42
  raise ValueError("Unsupported file object from Gradio")
43
 
44
  async def download_to_drive(self, dst_path: str) -> None:
45
- # Asynchronous signature to match your module's expectations.
46
- # We will copy the local file path to dst_path.
47
- # Gradio stores the uploaded file locally, so simple copy works.
48
  loop = asyncio.get_event_loop()
49
  await loop.run_in_executor(None, shutil.copyfile, self._path, dst_path)
50
 
51
- # Small helper to call async functions from sync Gradio callbacks
52
  def run_async(coro):
53
  return asyncio.run(coro)
54
 
55
  # ------------------------------------------------------------------
56
- # Gradio callback wrappers
57
  # ------------------------------------------------------------------
58
-
59
  def text_chat(user_id: Optional[int], text: str, lang: str = "en"):
60
  try:
61
- uid = int(user_id) if user_id not in (None, "", "None") else 0
62
  reply = run_async(AI.generate_response(text, uid, lang))
63
  return reply
64
  except Exception as e:
@@ -66,26 +47,24 @@ def text_chat(user_id: Optional[int], text: str, lang: str = "en"):
66
 
67
  def voice_process(user_id: Optional[int], audio_file):
68
  try:
69
- uid = int(user_id) if user_id not in (None, "", "None") else 0
70
  wrapper = GradioFileWrapper(audio_file)
71
  result = run_async(AI.process_voice_message(wrapper, uid))
72
- # return a readable text blob with details
73
  return json.dumps(result, ensure_ascii=False, indent=2)
74
  except Exception as e:
75
  return f"Error: {e}"
76
 
77
  def generate_voice(user_id: Optional[int], reply_text: str, fmt: str = "ogg"):
78
  try:
79
- uid = int(user_id) if user_id not in (None, "", "None") else 0
80
  path = run_async(AI.generate_voice_reply(reply_text, uid, fmt))
81
- # Gradio audio accepts a path
82
  return path
83
  except Exception as e:
84
  return None, f"Error: {e}"
85
 
86
  def image_caption(user_id: Optional[int], image_file):
87
  try:
88
- uid = int(user_id) if user_id not in (None, "", "None") else 0
89
  wrapper = GradioFileWrapper(image_file)
90
  caption = run_async(AI.process_image_message(wrapper, uid))
91
  return caption
@@ -94,7 +73,7 @@ def image_caption(user_id: Optional[int], image_file):
94
 
95
  def generate_image(user_id: Optional[int], prompt: str, width: int = 512, height: int = 512, steps: int = 30):
96
  try:
97
- uid = int(user_id) if user_id not in (None, "", "None") else 0
98
  path = run_async(AI.generate_image_from_text(prompt, uid, width=width, height=height, steps=steps))
99
  return path
100
  except Exception as e:
@@ -102,9 +81,9 @@ def generate_image(user_id: Optional[int], prompt: str, width: int = 512, height
102
 
103
  def edit_image(user_id: Optional[int], image_file, mask_file, prompt: str = ""):
104
  try:
105
- uid = int(user_id) if user_id not in (None, "", "None") else 0
106
  img_w = GradioFileWrapper(image_file)
107
- mask_w = GradioFileWrapper(mask_file) if mask_file not in (None, "", "None") else None
108
  path = run_async(AI.edit_image_inpaint(img_w, mask_w, prompt, uid))
109
  return path
110
  except Exception as e:
@@ -112,7 +91,7 @@ def edit_image(user_id: Optional[int], image_file, mask_file, prompt: str = ""):
112
 
113
  def process_video(user_id: Optional[int], video_file):
114
  try:
115
- uid = int(user_id) if user_id not in (None, "", "None") else 0
116
  wrapper = GradioFileWrapper(video_file)
117
  res = run_async(AI.process_video(wrapper, uid))
118
  return json.dumps(res, ensure_ascii=False, indent=2)
@@ -121,7 +100,7 @@ def process_video(user_id: Optional[int], video_file):
121
 
122
  def process_file(user_id: Optional[int], file_obj):
123
  try:
124
- uid = int(user_id) if user_id not in (None, "", "None") else 0
125
  w = GradioFileWrapper(file_obj)
126
  res = run_async(AI.process_file(w, uid))
127
  return json.dumps(res, ensure_ascii=False, indent=2)
@@ -130,7 +109,7 @@ def process_file(user_id: Optional[int], file_obj):
130
 
131
  def code_complete(user_id: Optional[int], prompt: str, max_tokens: int = 512):
132
  try:
133
- uid = int(user_id) if user_id not in (None, "", "None") else 0
134
  out = run_async(AI.code_complete(prompt, max_tokens=max_tokens))
135
  return out
136
  except Exception as e:
@@ -139,88 +118,72 @@ def code_complete(user_id: Optional[int], prompt: str, max_tokens: int = 512):
139
  # ------------------------------------------------------------------
140
  # Gradio UI
141
  # ------------------------------------------------------------------
142
- with gr.Blocks(title="Multimodal Bot (uses your multimodal_module.py)") as demo:
143
- gr.Markdown("# Multimodal Bot\nThis Space uses the exact `multimodal_module.py` you uploaded. Use the tabs below.")
144
- with gr.Tab("Text Chat"):
 
145
  with gr.Row():
146
  user_id_txt = gr.Textbox(label="User ID (optional)", placeholder="0")
147
  lang_sel = gr.Dropdown(choices=["en","zh","ja","ko","es","fr","de","it"], value="en", label="Language")
148
- txt_in = gr.Textbox(label="User text", lines=4)
149
- txt_out = gr.Textbox(label="Reply", lines=6)
150
- txt_btn = gr.Button("Send")
151
- txt_btn.click(fn=text_chat, inputs=[user_id_txt, txt_in, lang_sel], outputs=txt_out)
152
 
153
- with gr.Tab("Voice (transcribe + emotion)"):
154
- with gr.Row():
155
- user_id_voice = gr.Textbox(label="User ID (optional)", placeholder="0")
156
- voice_in = gr.Audio(source="upload", type="filepath", label="Upload voice (.ogg/.wav)")
157
  voice_out = gr.Textbox(label="Result JSON")
158
- voice_btn = gr.Button("Process Voice")
159
- voice_btn.click(fn=voice_process, inputs=[user_id_voice, voice_in], outputs=voice_out)
160
 
161
- with gr.Tab("Voice Reply (TTS)"):
162
- with gr.Row():
163
- user_id_vr = gr.Textbox(label="User ID (optional)", placeholder="0")
164
- vr_text = gr.Textbox(label="Text to convert to voice", lines=4)
165
  vr_fmt = gr.Dropdown(choices=["ogg","wav","mp3"], value="ogg", label="Format")
166
  vr_audio = gr.Audio(label="Generated Voice")
167
- vr_btn = gr.Button("Generate Voice")
168
- vr_btn.click(fn=generate_voice, inputs=[user_id_vr, vr_text, vr_fmt], outputs=vr_audio)
169
 
170
- with gr.Tab("Image Caption"):
171
- with gr.Row():
172
- user_id_img = gr.Textbox(label="User ID (optional)", placeholder="0")
173
  img_in = gr.Image(type="filepath", label="Upload Image")
174
  img_out = gr.Textbox(label="Caption")
175
- img_btn = gr.Button("Caption Image")
176
- img_btn.click(fn=image_caption, inputs=[user_id_img, img_in], outputs=img_out)
177
 
178
- with gr.Tab("Image Generate"):
179
- with gr.Row():
180
- user_id_gi = gr.Textbox(label="User ID (optional)", placeholder="0")
181
  prompt_in = gr.Textbox(label="Prompt", lines=3)
182
- width = gr.Slider(minimum=256, maximum=1024, step=64, value=512, label="Width")
183
- height = gr.Slider(minimum=256, maximum=1024, step=64, value=512, label="Height")
184
- steps = gr.Slider(minimum=10, maximum=50, step=5, value=30, label="Steps")
185
  gen_out = gr.Image(type="filepath", label="Generated image")
186
- gen_btn = gr.Button("Generate")
187
- gen_btn.click(fn=generate_image, inputs=[user_id_gi, prompt_in, width, height, steps], outputs=gen_out)
188
 
189
- with gr.Tab("Image Edit (Inpaint)"):
190
- with gr.Row():
191
- user_id_ie = gr.Textbox(label="User ID (optional)", placeholder="0")
192
  edit_img = gr.Image(type="filepath", label="Image to edit")
193
- edit_mask = gr.Image(type="filepath", label="Mask (white=edit black=keep) (optional)")
194
- edit_prompt = gr.Textbox(label="Prompt (what to paint)", lines=2)
195
  edit_out = gr.Image(type="filepath", label="Edited image")
196
- edit_btn = gr.Button("Edit Image")
197
- edit_btn.click(fn=edit_image, inputs=[user_id_ie, edit_img, edit_mask, edit_prompt], outputs=edit_out)
198
 
199
- with gr.Tab("Video"):
200
- with gr.Row():
201
- user_id_vid = gr.Textbox(label="User ID (optional)", placeholder="0")
202
  vid_in = gr.Video(label="Upload video")
203
  vid_out = gr.Textbox(label="Result JSON")
204
- vid_btn = gr.Button("Process Video")
205
- vid_btn.click(fn=process_video, inputs=[user_id_vid, vid_in], outputs=vid_out)
206
 
207
- with gr.Tab("Files (PDF/DOCX/TXT)"):
208
- with gr.Row():
209
- user_id_file = gr.Textbox(label="User ID (optional)", placeholder="0")
210
  file_in = gr.File(label="Upload file")
211
  file_out = gr.Textbox(label="Result JSON")
212
- file_btn = gr.Button("Process File")
213
- file_btn.click(fn=process_file, inputs=[user_id_file, file_in], outputs=file_out)
214
 
215
- with gr.Tab("Code (StarCoder)"):
216
- with gr.Row():
217
- user_id_code = gr.Textbox(label="User ID (optional)", placeholder="0")
218
- code_prompt = gr.Textbox(label="Prompt for code generation", lines=6)
219
  code_out = gr.Textbox(label="Generated code", lines=12)
220
- code_btn = gr.Button("Generate Code")
221
- code_btn.click(fn=code_complete, inputs=[user_id_code, code_prompt], outputs=code_out)
222
 
223
- gr.Markdown("----\nYour underlying `multimodal_module.py` is used exactly as uploaded. Be patient on first calls β€” heavy models load lazily.")
224
 
225
- # Launch
226
  demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
 
6
  from typing import Optional
7
 
8
  import gradio as gr
 
 
9
  from multimodal_module import MultiModalChatModule
10
 
11
+ # Instantiate AI
12
  AI = MultiModalChatModule()
13
 
14
  # ------------------------------------------------------------------
15
+ # File wrapper to adapt Gradio uploads to your module
 
 
 
16
  # ------------------------------------------------------------------
17
  class GradioFileWrapper:
18
  def __init__(self, gr_file):
 
 
 
 
 
 
19
  if isinstance(gr_file, str):
 
20
  self._path = gr_file
21
  else:
 
22
  try:
23
+ self._path = gr_file.name
24
  except Exception:
 
25
  try:
26
  self._path = gr_file["name"]
27
  except Exception:
28
  raise ValueError("Unsupported file object from Gradio")
29
 
30
  async def download_to_drive(self, dst_path: str) -> None:
 
 
 
31
  loop = asyncio.get_event_loop()
32
  await loop.run_in_executor(None, shutil.copyfile, self._path, dst_path)
33
 
 
34
  def run_async(coro):
35
  return asyncio.run(coro)
36
 
37
  # ------------------------------------------------------------------
38
+ # Callback functions
39
  # ------------------------------------------------------------------
 
40
  def text_chat(user_id: Optional[int], text: str, lang: str = "en"):
41
  try:
42
+ uid = int(user_id) if user_id else 0
43
  reply = run_async(AI.generate_response(text, uid, lang))
44
  return reply
45
  except Exception as e:
 
47
 
48
  def voice_process(user_id: Optional[int], audio_file):
49
  try:
50
+ uid = int(user_id) if user_id else 0
51
  wrapper = GradioFileWrapper(audio_file)
52
  result = run_async(AI.process_voice_message(wrapper, uid))
 
53
  return json.dumps(result, ensure_ascii=False, indent=2)
54
  except Exception as e:
55
  return f"Error: {e}"
56
 
57
  def generate_voice(user_id: Optional[int], reply_text: str, fmt: str = "ogg"):
58
  try:
59
+ uid = int(user_id) if user_id else 0
60
  path = run_async(AI.generate_voice_reply(reply_text, uid, fmt))
 
61
  return path
62
  except Exception as e:
63
  return None, f"Error: {e}"
64
 
65
  def image_caption(user_id: Optional[int], image_file):
66
  try:
67
+ uid = int(user_id) if user_id else 0
68
  wrapper = GradioFileWrapper(image_file)
69
  caption = run_async(AI.process_image_message(wrapper, uid))
70
  return caption
 
73
 
74
  def generate_image(user_id: Optional[int], prompt: str, width: int = 512, height: int = 512, steps: int = 30):
75
  try:
76
+ uid = int(user_id) if user_id else 0
77
  path = run_async(AI.generate_image_from_text(prompt, uid, width=width, height=height, steps=steps))
78
  return path
79
  except Exception as e:
 
81
 
82
  def edit_image(user_id: Optional[int], image_file, mask_file, prompt: str = ""):
83
  try:
84
+ uid = int(user_id) if user_id else 0
85
  img_w = GradioFileWrapper(image_file)
86
+ mask_w = GradioFileWrapper(mask_file) if mask_file else None
87
  path = run_async(AI.edit_image_inpaint(img_w, mask_w, prompt, uid))
88
  return path
89
  except Exception as e:
 
91
 
92
  def process_video(user_id: Optional[int], video_file):
93
  try:
94
+ uid = int(user_id) if user_id else 0
95
  wrapper = GradioFileWrapper(video_file)
96
  res = run_async(AI.process_video(wrapper, uid))
97
  return json.dumps(res, ensure_ascii=False, indent=2)
 
100
 
101
  def process_file(user_id: Optional[int], file_obj):
102
  try:
103
+ uid = int(user_id) if user_id else 0
104
  w = GradioFileWrapper(file_obj)
105
  res = run_async(AI.process_file(w, uid))
106
  return json.dumps(res, ensure_ascii=False, indent=2)
 
109
 
110
  def code_complete(user_id: Optional[int], prompt: str, max_tokens: int = 512):
111
  try:
112
+ uid = int(user_id) if user_id else 0
113
  out = run_async(AI.code_complete(prompt, max_tokens=max_tokens))
114
  return out
115
  except Exception as e:
 
118
  # ------------------------------------------------------------------
119
  # Gradio UI
120
  # ------------------------------------------------------------------
121
+ with gr.Blocks(title="Multimodal Bot (Gradio)") as demo:
122
+ gr.Markdown("# 🧠 Multimodal Bot\nInteract via text, voice, images, video, or files.")
123
+
124
+ with gr.Tab("πŸ’¬ Text Chat"):
125
  with gr.Row():
126
  user_id_txt = gr.Textbox(label="User ID (optional)", placeholder="0")
127
  lang_sel = gr.Dropdown(choices=["en","zh","ja","ko","es","fr","de","it"], value="en", label="Language")
128
+ txt_in = gr.Textbox(label="Your message", lines=4)
129
+ txt_out = gr.Textbox(label="Bot reply", lines=6)
130
+ gr.Button("Send").click(text_chat, [user_id_txt, txt_in, lang_sel], txt_out)
 
131
 
132
+ with gr.Tab("🎀 Voice (Transcribe + Emotion)"):
133
+ user_id_voice = gr.Textbox(label="User ID (optional)", placeholder="0")
134
+ voice_in = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Record or upload voice (.ogg/.wav)")
 
135
  voice_out = gr.Textbox(label="Result JSON")
136
+ gr.Button("Process Voice").click(voice_process, [user_id_voice, voice_in], voice_out)
 
137
 
138
+ with gr.Tab("πŸ”Š Voice Reply (TTS)"):
139
+ user_id_vr = gr.Textbox(label="User ID (optional)", placeholder="0")
140
+ vr_text = gr.Textbox(label="Text to speak", lines=4)
 
141
  vr_fmt = gr.Dropdown(choices=["ogg","wav","mp3"], value="ogg", label="Format")
142
  vr_audio = gr.Audio(label="Generated Voice")
143
+ gr.Button("Generate Voice").click(generate_voice, [user_id_vr, vr_text, vr_fmt], vr_audio)
 
144
 
145
+ with gr.Tab("πŸ–ΌοΈ Image Caption"):
146
+ user_id_img = gr.Textbox(label="User ID (optional)", placeholder="0")
 
147
  img_in = gr.Image(type="filepath", label="Upload Image")
148
  img_out = gr.Textbox(label="Caption")
149
+ gr.Button("Caption Image").click(image_caption, [user_id_img, img_in], img_out)
 
150
 
151
+ with gr.Tab("🎨 Image Generate"):
152
+ user_id_gi = gr.Textbox(label="User ID (optional)", placeholder="0")
 
153
  prompt_in = gr.Textbox(label="Prompt", lines=3)
154
+ width = gr.Slider(256, 1024, 512, step=64, label="Width")
155
+ height = gr.Slider(256, 1024, 512, step=64, label="Height")
156
+ steps = gr.Slider(10, 50, 30, step=5, label="Steps")
157
  gen_out = gr.Image(type="filepath", label="Generated image")
158
+ gr.Button("Generate").click(generate_image, [user_id_gi, prompt_in, width, height, steps], gen_out)
 
159
 
160
+ with gr.Tab("✏️ Image Edit (Inpaint)"):
161
+ user_id_ie = gr.Textbox(label="User ID (optional)", placeholder="0")
 
162
  edit_img = gr.Image(type="filepath", label="Image to edit")
163
+ edit_mask = gr.Image(type="filepath", label="Mask (optional)")
164
+ edit_prompt = gr.Textbox(label="Prompt", lines=2)
165
  edit_out = gr.Image(type="filepath", label="Edited image")
166
+ gr.Button("Edit Image").click(edit_image, [user_id_ie, edit_img, edit_mask, edit_prompt], edit_out)
 
167
 
168
+ with gr.Tab("πŸŽ₯ Video"):
169
+ user_id_vid = gr.Textbox(label="User ID (optional)", placeholder="0")
 
170
  vid_in = gr.Video(label="Upload video")
171
  vid_out = gr.Textbox(label="Result JSON")
172
+ gr.Button("Process Video").click(process_video, [user_id_vid, vid_in], vid_out)
 
173
 
174
+ with gr.Tab("πŸ“„ Files (PDF/DOCX/TXT)"):
175
+ user_id_file = gr.Textbox(label="User ID (optional)", placeholder="0")
 
176
  file_in = gr.File(label="Upload file")
177
  file_out = gr.Textbox(label="Result JSON")
178
+ gr.Button("Process File").click(process_file, [user_id_file, file_in], file_out)
 
179
 
180
+ with gr.Tab("πŸ’» Code Generation"):
181
+ user_id_code = gr.Textbox(label="User ID (optional)", placeholder="0")
182
+ code_prompt = gr.Textbox(label="Code prompt", lines=6)
 
183
  code_out = gr.Textbox(label="Generated code", lines=12)
184
+ gr.Button("Generate Code").click(code_complete, [user_id_code, code_prompt], code_out)
 
185
 
186
+ gr.Markdown("----\nThis Space runs your exact `multimodal_module.py`. First requests may take longer due to model loading.")
187
 
188
+ # Launch app
189
  demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))