Princeaka commited on
Commit
1afccba
Β·
verified Β·
1 Parent(s): 72ec8f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -34
app.py CHANGED
@@ -1,74 +1,112 @@
1
  import os
 
 
2
  import gradio as gr
3
  from multimodal_module import MultiModalChatModule
4
- import asyncio
5
 
6
  # Initialize module
7
  mm = MultiModalChatModule()
8
 
9
- # Environment configuration
10
  os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
11
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
12
 
13
- async def async_wrapper(fn, *args):
14
- """Handle async calls from Gradio"""
15
- return await fn(*args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- def process_voice(audio, user_id):
18
- return asyncio.run(async_wrapper(mm.process_voice_message, audio, int(user_id)))
 
 
 
19
 
20
- def process_image(image, user_id):
21
- return asyncio.run(async_wrapper(mm.process_image_message, image, int(user_id)))
 
22
 
23
  def chat(text, user_id, lang):
24
- return asyncio.run(async_wrapper(mm.generate_response, text, int(user_id), lang))
25
 
26
  def generate_image(prompt, user_id):
27
- return asyncio.run(async_wrapper(
28
- mm.generate_image_from_text,
29
- prompt,
30
- int(user_id)
31
- ))
32
 
33
  with gr.Blocks(title="Multimodal AI Assistant") as app:
34
- gr.Markdown("## πŸš€ Multimodal AI Assistant")
35
-
36
  with gr.Tab("πŸ’¬ Text Chat"):
37
  with gr.Row():
38
- user_id = gr.Textbox(label="User ID", value="123")
39
  lang = gr.Dropdown(["en", "es", "fr", "de"], label="Language", value="en")
40
  chat_input = gr.Textbox(label="Your Message")
41
  chat_output = gr.Textbox(label="AI Response", interactive=False)
42
  chat_btn = gr.Button("Send")
43
-
 
44
  with gr.Tab("πŸŽ™οΈ Voice"):
45
- voice_input = gr.Audio(sources=["microphone", "upload"], type="filepath")
46
  voice_user = gr.Textbox(label="User ID", value="123")
47
  voice_output = gr.JSON(label="Analysis Results")
48
  voice_btn = gr.Button("Process")
49
-
 
50
  with gr.Tab("πŸ–ΌοΈ Images"):
51
  with gr.Tab("Describe"):
52
- img_input = gr.Image(type="filepath")
53
  img_user = gr.Textbox(label="User ID", value="123")
54
  img_output = gr.Textbox(label="Description")
55
  img_btn = gr.Button("Describe")
56
-
 
57
  with gr.Tab("Generate"):
58
  gen_prompt = gr.Textbox(label="Prompt")
59
  gen_user = gr.Textbox(label="User ID", value="123")
60
  gen_output = gr.Image(label="Generated Image")
61
  gen_btn = gr.Button("Generate")
62
-
63
- # Event handlers
64
- chat_btn.click(chat, [chat_input, user_id, lang], chat_output)
65
- voice_btn.click(process_voice, [voice_input, voice_user], voice_output)
66
- img_btn.click(process_image, [img_input, img_user], img_output)
67
- gen_btn.click(generate_image, [gen_prompt, gen_user], gen_output)
 
 
68
 
69
  if __name__ == "__main__":
70
- app.launch(
71
- server_name="0.0.0.0",
72
- server_port=7860,
73
- share=False
74
- )
 
1
  import os
2
+ import shutil
3
+ import asyncio
4
  import gradio as gr
5
  from multimodal_module import MultiModalChatModule
 
6
 
7
  # Initialize module
8
  mm = MultiModalChatModule()
9
 
10
+ # Environment configuration (already safe but keep)
11
  os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
12
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
13
 
14
+ # A tiny async-compatible "file-like" wrapper so your multimodal_module methods
15
+ # (which expect objects with an async download_to_drive(...) method) work
16
+ class AsyncPathWrapper:
17
+ def __init__(self, path: str):
18
+ self.path = path
19
+
20
+ async def download_to_drive(self, dst_path: str):
21
+ # perform copy synchronously but keep API async
22
+ try:
23
+ os.makedirs(os.path.dirname(dst_path), exist_ok=True)
24
+ shutil.copy(self.path, dst_path)
25
+ except Exception as e:
26
+ # raise to allow upper-level error handling
27
+ raise
28
+
29
+ # Helper to call async methods from sync Gradio callbacks
30
+ def run_async(fn, *args, **kwargs):
31
+ return asyncio.run(fn(*args, **kwargs))
32
+
33
+ # Wrappers that adapt Gradio returned file paths to the module's expected interface
34
+ def _wrap_audio(audio_path):
35
+ if not audio_path:
36
+ return None
37
+ return AsyncPathWrapper(audio_path)
38
+
39
+ def _wrap_image(image_path):
40
+ if not image_path:
41
+ return None
42
+ return AsyncPathWrapper(image_path)
43
+
44
+ def _wrap_file(file_path):
45
+ if not file_path:
46
+ return None
47
+ return AsyncPathWrapper(file_path)
48
 
49
+ # Gradio binding functions
50
+ def process_voice(audio_filepath, user_id):
51
+ # mm.process_voice_message expects an object with download_to_drive
52
+ wrapped = _wrap_audio(audio_filepath)
53
+ return run_async(mm.process_voice_message, wrapped, int(user_id))
54
 
55
+ def process_image(image_filepath, user_id):
56
+ wrapped = _wrap_image(image_filepath)
57
+ return run_async(mm.process_image_message, wrapped, int(user_id))
58
 
59
  def chat(text, user_id, lang):
60
+ return run_async(mm.generate_response, text, int(user_id), lang)
61
 
62
  def generate_image(prompt, user_id):
63
+ return run_async(mm.generate_image_from_text, prompt, int(user_id))
64
+
65
+ def process_file(file_path, user_id):
66
+ wrapped = _wrap_file(file_path)
67
+ return run_async(mm.process_file, wrapped, int(user_id))
68
 
69
  with gr.Blocks(title="Multimodal AI Assistant") as app:
70
+ gr.Markdown("## πŸš€ Multimodal AI Assistant (Space-friendly)")
71
+
72
  with gr.Tab("πŸ’¬ Text Chat"):
73
  with gr.Row():
74
+ user_id_txt = gr.Textbox(label="User ID", value="123")
75
  lang = gr.Dropdown(["en", "es", "fr", "de"], label="Language", value="en")
76
  chat_input = gr.Textbox(label="Your Message")
77
  chat_output = gr.Textbox(label="AI Response", interactive=False)
78
  chat_btn = gr.Button("Send")
79
+ chat_btn.click(fn=chat, inputs=[chat_input, user_id_txt, lang], outputs=chat_output)
80
+
81
  with gr.Tab("πŸŽ™οΈ Voice"):
82
+ voice_input = gr.Audio(source="microphone", type="filepath", label="Speak or upload an audio file")
83
  voice_user = gr.Textbox(label="User ID", value="123")
84
  voice_output = gr.JSON(label="Analysis Results")
85
  voice_btn = gr.Button("Process")
86
+ voice_btn.click(fn=process_voice, inputs=[voice_input, voice_user], outputs=voice_output)
87
+
88
  with gr.Tab("πŸ–ΌοΈ Images"):
89
  with gr.Tab("Describe"):
90
+ img_input = gr.Image(type="filepath", label="Upload an image")
91
  img_user = gr.Textbox(label="User ID", value="123")
92
  img_output = gr.Textbox(label="Description")
93
  img_btn = gr.Button("Describe")
94
+ img_btn.click(fn=process_image, inputs=[img_input, img_user], outputs=img_output)
95
+
96
  with gr.Tab("Generate"):
97
  gen_prompt = gr.Textbox(label="Prompt")
98
  gen_user = gr.Textbox(label="User ID", value="123")
99
  gen_output = gr.Image(label="Generated Image")
100
  gen_btn = gr.Button("Generate")
101
+ gen_btn.click(fn=generate_image, inputs=[gen_prompt, gen_user], outputs=gen_output)
102
+
103
+ with gr.Tab("πŸ“„ Files"):
104
+ file_input = gr.File(file_count="single", label="Upload a document (pdf, txt, docx)")
105
+ file_user = gr.Textbox(label="User ID", value="123")
106
+ file_output = gr.JSON(label="File Processing Result")
107
+ file_btn = gr.Button("Process File")
108
+ file_btn.click(fn=process_file, inputs=[file_input, file_user], outputs=file_output)
109
 
110
  if __name__ == "__main__":
111
+ # Let Spaces manage server settings. This still works locally.
112
+ app.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))