Spaces:
Running
Running
File size: 4,515 Bytes
02ab8b4 1afccba 270b829 4ce8649 270b829 1afccba 4ce8649 270b829 1afccba 270b829 1afccba 270b829 1afccba 270b829 4ce8649 1afccba 270b829 4ce8649 1afccba 270b829 4ce8649 1afccba 4ce8649 1afccba 4ce8649 1afccba 4ce8649 1afccba 4ce8649 1afccba 4ce8649 1afccba 4ce8649 1afccba 4ce8649 1afccba 4ce8649 1afccba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import os
import shutil
import asyncio
import gradio as gr
from multimodal_module import MultiModalChatModule
# Initialize module
mm = MultiModalChatModule()
# Environment configuration (already safe but keep)
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# A tiny async-compatible "file-like" wrapper so your multimodal_module methods
# (which expect objects with an async download_to_drive(...) method) work
class AsyncPathWrapper:
def __init__(self, path: str):
self.path = path
async def download_to_drive(self, dst_path: str):
# perform copy synchronously but keep API async
try:
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
shutil.copy(self.path, dst_path)
except Exception as e:
# raise to allow upper-level error handling
raise
# Helper to call async methods from sync Gradio callbacks
def run_async(fn, *args, **kwargs):
return asyncio.run(fn(*args, **kwargs))
# Wrappers that adapt Gradio returned file paths to the module's expected interface
def _wrap_audio(audio_path):
if not audio_path:
return None
return AsyncPathWrapper(audio_path)
def _wrap_image(image_path):
if not image_path:
return None
return AsyncPathWrapper(image_path)
def _wrap_file(file_path):
if not file_path:
return None
return AsyncPathWrapper(file_path)
# Gradio binding functions
def process_voice(audio_filepath, user_id):
# mm.process_voice_message expects an object with download_to_drive
wrapped = _wrap_audio(audio_filepath)
return run_async(mm.process_voice_message, wrapped, int(user_id))
def process_image(image_filepath, user_id):
wrapped = _wrap_image(image_filepath)
return run_async(mm.process_image_message, wrapped, int(user_id))
def chat(text, user_id, lang):
return run_async(mm.generate_response, text, int(user_id), lang)
def generate_image(prompt, user_id):
return run_async(mm.generate_image_from_text, prompt, int(user_id))
def process_file(file_path, user_id):
wrapped = _wrap_file(file_path)
return run_async(mm.process_file, wrapped, int(user_id))
with gr.Blocks(title="Multimodal AI Assistant") as app:
gr.Markdown("## π Multimodal AI Assistant (Space-friendly)")
with gr.Tab("π¬ Text Chat"):
with gr.Row():
user_id_txt = gr.Textbox(label="User ID", value="123")
lang = gr.Dropdown(["en", "es", "fr", "de"], label="Language", value="en")
chat_input = gr.Textbox(label="Your Message")
chat_output = gr.Textbox(label="AI Response", interactive=False)
chat_btn = gr.Button("Send")
chat_btn.click(fn=chat, inputs=[chat_input, user_id_txt, lang], outputs=chat_output)
with gr.Tab("ποΈ Voice"):
voice_input = gr.Audio(source="microphone", type="filepath", label="Speak or upload an audio file")
voice_user = gr.Textbox(label="User ID", value="123")
voice_output = gr.JSON(label="Analysis Results")
voice_btn = gr.Button("Process")
voice_btn.click(fn=process_voice, inputs=[voice_input, voice_user], outputs=voice_output)
with gr.Tab("πΌοΈ Images"):
with gr.Tab("Describe"):
img_input = gr.Image(type="filepath", label="Upload an image")
img_user = gr.Textbox(label="User ID", value="123")
img_output = gr.Textbox(label="Description")
img_btn = gr.Button("Describe")
img_btn.click(fn=process_image, inputs=[img_input, img_user], outputs=img_output)
with gr.Tab("Generate"):
gen_prompt = gr.Textbox(label="Prompt")
gen_user = gr.Textbox(label="User ID", value="123")
gen_output = gr.Image(label="Generated Image")
gen_btn = gr.Button("Generate")
gen_btn.click(fn=generate_image, inputs=[gen_prompt, gen_user], outputs=gen_output)
with gr.Tab("π Files"):
file_input = gr.File(file_count="single", label="Upload a document (pdf, txt, docx)")
file_user = gr.Textbox(label="User ID", value="123")
file_output = gr.JSON(label="File Processing Result")
file_btn = gr.Button("Process File")
file_btn.click(fn=process_file, inputs=[file_input, file_user], outputs=file_output)
if __name__ == "__main__":
# Let Spaces manage server settings. This still works locally.
app.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860))) |