Spaces:

Agents-MCP-Hackathon
/

LLMGameHub

Running

App Files Files Community

gsavin commited on Jun 9

Commit

45fabe9

1 Parent(s): 21eb680

Merge branch 'feature/unique-session-ids' of https://github.com/DeltaZN/gradio-mcp-hackaton into feature/unique-session-ids

Browse files

Files changed (4) hide show

src/audio/audio_generator.py +39 -30
src/config.py +1 -1
src/images/image_generator.py +3 -3
src/main.py +9 -7

src/audio/audio_generator.py CHANGED Viewed

@@ -5,18 +5,23 @@ import queue
 import logging
 import io
 import time
 logger = logging.getLogger(__name__)
-from services.google import GoogleClientFactory
 async def generate_music(user_hash: str, music_tone: str, receive_audio):
     if user_hash in sessions:
-        logger.info(f"Music generation already started for user hash {user_hash}, skipping new generation")
         return
     async with GoogleClientFactory.audio() as client:
         async with (
-            client.live.music.connect(model='models/lyria-realtime-exp') as session,
             asyncio.TaskGroup() as tg,
         ):
             # Set up task to receive server messages.
@@ -27,26 +32,24 @@ async def generate_music(user_hash: str, music_tone: str, receive_audio):
                 session.set_weighted_prompts(
                     prompts=[types.WeightedPrompt(text=music_tone, weight=1.0)]
                 ),
-                40,
             )
             await asyncio.wait_for(
                 session.set_music_generation_config(
                     config=types.LiveMusicGenerationConfig(bpm=90, temperature=1.0)
-            ),
-            40,
             )
-            await asyncio.wait_for(session.play(), 40)
             logger.info(
                 f"Started music generation for user hash {user_hash}, music tone: {music_tone}"
             )
-            sessions[user_hash] = {
-                'session': session,
-                'queue': queue.Queue()
-            }
 async def change_music_tone(user_hash: str, new_tone):
     logger.info(f"Changing music tone to {new_tone}")
-    session = sessions.get(user_hash, {}).get('session')
     if not session:
         logger.error(f"No session found for user hash {user_hash}")
         return
@@ -54,14 +57,15 @@ async def change_music_tone(user_hash: str, new_tone):
         session.set_weighted_prompts(
             prompts=[types.WeightedPrompt(text=new_tone, weight=1.0)]
         ),
-        40,
     )
 SAMPLE_RATE = 48000
 NUM_CHANNELS = 2  # Stereo
 SAMPLE_WIDTH = 2  # 16-bit audio -> 2 bytes per sample
 async def receive_audio(session, user_hash):
     """Process incoming audio from the music generation."""
     while True:
@@ -69,7 +73,7 @@ async def receive_audio(session, user_hash):
             async for message in session.receive():
                 if message.server_content and message.server_content.audio_chunks:
                     audio_data = message.server_content.audio_chunks[0].data
-                    queue = sessions[user_hash]['queue']
                     # audio_data is already bytes (raw PCM)
                     await asyncio.to_thread(queue.put, audio_data)
                 await asyncio.sleep(10**-12)
@@ -77,42 +81,47 @@ async def receive_audio(session, user_hash):
             logger.error(f"Error in receive_audio: {e}")
             break
 sessions = {}
 async def start_music_generation(user_hash: str, music_tone: str):
     """Start the music generation in a separate thread."""
     await generate_music(user_hash, music_tone, receive_audio)
 async def cleanup_music_session(user_hash: str):
     if user_hash in sessions:
         logger.info(f"Cleaning up music session for user hash {user_hash}")
-        session = sessions[user_hash]['session']
-        await asyncio.wait_for(session.stop(), 40)
-        await asyncio.wait_for(session.close(), 40)
         del sessions[user_hash]
 def update_audio(user_hash):
     """Continuously stream audio from the queue as WAV bytes."""
     if user_hash == "":
         return
     logger.info(f"Starting audio update loop for user hash: {user_hash}")
     while True:
         if user_hash not in sessions:
             time.sleep(0.5)
             continue
-        queue = sessions[user_hash]['queue']
-        pcm_data = queue.get() # This is raw PCM audio bytes
         if not isinstance(pcm_data, bytes):
-            logger.warning(f"Expected bytes from audio_queue, got {type(pcm_data)}. Skipping.")
             continue
         # Lyria provides stereo, 16-bit PCM at 48kHz.
         # Ensure the number of bytes is consistent with stereo 16-bit audio.
         # Each frame = NUM_CHANNELS * SAMPLE_WIDTH bytes.
-        # If len(pcm_data) is not a multiple of (NUM_CHANNELS * SAMPLE_WIDTH),
         # it might indicate an incomplete chunk or an issue.
         bytes_per_frame = NUM_CHANNELS * SAMPLE_WIDTH
         if len(pcm_data) % bytes_per_frame != 0:
@@ -121,12 +130,12 @@ def update_audio(user_hash):
                 f"bytes_per_frame ({bytes_per_frame}). This might cause issues with WAV formatting."
             )
             # Depending on strictness, you might want to skip this chunk:
-            # continue
         wav_buffer = io.BytesIO()
-        with wave.open(wav_buffer, 'wb') as wf:
             wf.setnchannels(NUM_CHANNELS)
-            wf.setsampwidth(SAMPLE_WIDTH) # Corresponds to 16-bit audio
             wf.setframerate(SAMPLE_RATE)
             wf.writeframes(pcm_data)
         wav_bytes = wav_buffer.getvalue()

 import logging
 import io
 import time
+from config import settings
+from services.google import GoogleClientFactory
 logger = logging.getLogger(__name__)
 async def generate_music(user_hash: str, music_tone: str, receive_audio):
     if user_hash in sessions:
+        logger.info(
+            f"Music generation already started for user hash {user_hash}, skipping new generation"
+        )
         return
     async with GoogleClientFactory.audio() as client:
         async with (
+            client.live.music.connect(model="models/lyria-realtime-exp") as session,
             asyncio.TaskGroup() as tg,
         ):
             # Set up task to receive server messages.
                 session.set_weighted_prompts(
                     prompts=[types.WeightedPrompt(text=music_tone, weight=1.0)]
                 ),
+                settings.request_timeout,
             )
             await asyncio.wait_for(
                 session.set_music_generation_config(
                     config=types.LiveMusicGenerationConfig(bpm=90, temperature=1.0)
+                ),
+                settings.request_timeout,
             )
+            await asyncio.wait_for(session.play(), settings.request_timeout)
             logger.info(
                 f"Started music generation for user hash {user_hash}, music tone: {music_tone}"
             )
+            sessions[user_hash] = {"session": session, "queue": queue.Queue()}
 async def change_music_tone(user_hash: str, new_tone):
     logger.info(f"Changing music tone to {new_tone}")
+    session = sessions.get(user_hash, {}).get("session")
     if not session:
         logger.error(f"No session found for user hash {user_hash}")
         return
         session.set_weighted_prompts(
             prompts=[types.WeightedPrompt(text=new_tone, weight=1.0)]
         ),
+        settings.request_timeout,
     )
 SAMPLE_RATE = 48000
 NUM_CHANNELS = 2  # Stereo
 SAMPLE_WIDTH = 2  # 16-bit audio -> 2 bytes per sample
 async def receive_audio(session, user_hash):
     """Process incoming audio from the music generation."""
     while True:
             async for message in session.receive():
                 if message.server_content and message.server_content.audio_chunks:
                     audio_data = message.server_content.audio_chunks[0].data
+                    queue = sessions[user_hash]["queue"]
                     # audio_data is already bytes (raw PCM)
                     await asyncio.to_thread(queue.put, audio_data)
                 await asyncio.sleep(10**-12)
             logger.error(f"Error in receive_audio: {e}")
             break
 sessions = {}
 async def start_music_generation(user_hash: str, music_tone: str):
     """Start the music generation in a separate thread."""
     await generate_music(user_hash, music_tone, receive_audio)
 async def cleanup_music_session(user_hash: str):
     if user_hash in sessions:
         logger.info(f"Cleaning up music session for user hash {user_hash}")
+        session = sessions[user_hash]["session"]
+        await asyncio.wait_for(session.stop(), settings.request_timeout)
+        await asyncio.wait_for(session.close(), settings.request_timeout)
         del sessions[user_hash]
 def update_audio(user_hash):
     """Continuously stream audio from the queue as WAV bytes."""
     if user_hash == "":
         return
     logger.info(f"Starting audio update loop for user hash: {user_hash}")
     while True:
         if user_hash not in sessions:
             time.sleep(0.5)
             continue
+        queue = sessions[user_hash]["queue"]
+        pcm_data = queue.get()  # This is raw PCM audio bytes
         if not isinstance(pcm_data, bytes):
+            logger.warning(
+                f"Expected bytes from audio_queue, got {type(pcm_data)}. Skipping."
+            )
             continue
         # Lyria provides stereo, 16-bit PCM at 48kHz.
         # Ensure the number of bytes is consistent with stereo 16-bit audio.
         # Each frame = NUM_CHANNELS * SAMPLE_WIDTH bytes.
+        # If len(pcm_data) is not a multiple of (NUM_CHANNELS * SAMPLE_WIDTH),
         # it might indicate an incomplete chunk or an issue.
         bytes_per_frame = NUM_CHANNELS * SAMPLE_WIDTH
         if len(pcm_data) % bytes_per_frame != 0:
                 f"bytes_per_frame ({bytes_per_frame}). This might cause issues with WAV formatting."
             )
             # Depending on strictness, you might want to skip this chunk:
+            # continue
         wav_buffer = io.BytesIO()
+        with wave.open(wav_buffer, "wb") as wf:
             wf.setnchannels(NUM_CHANNELS)
+            wf.setsampwidth(SAMPLE_WIDTH)  # Corresponds to 16-bit audio
             wf.setframerate(SAMPLE_RATE)
             wf.writeframes(pcm_data)
         wav_bytes = wav_buffer.getvalue()

src/config.py CHANGED Viewed

@@ -29,6 +29,6 @@ class AppSettings(BaseAppSettings):
     top_p: float = 0.95
     temperature: float = 0.5
     pregenerate_next_scene: bool = True
 settings = AppSettings()

     top_p: float = 0.95
     temperature: float = 0.5
     pregenerate_next_scene: bool = True
+    request_timeout: int = 20
 settings = AppSettings()

src/images/image_generator.py CHANGED Viewed

@@ -6,7 +6,7 @@ from datetime import datetime
 import logging
 import asyncio
 import gradio as gr
 from services.google import GoogleClientFactory
 logger = logging.getLogger(__name__)
@@ -58,7 +58,7 @@ async def generate_image(prompt: str) -> tuple[str, str] | None:
                         safety_settings=safety_settings,
                     ),
                 ),
-                40,
             )
         # Process the response parts
@@ -125,7 +125,7 @@ async def modify_image(image_path: str, modification_prompt: str) -> str | None:
                         safety_settings=safety_settings,
                     ),
                 ),
-                40,
             )
         # Process the response parts

 import logging
 import asyncio
 import gradio as gr
+from config import settings
 from services.google import GoogleClientFactory
 logger = logging.getLogger(__name__)
                         safety_settings=safety_settings,
                     ),
                 ),
+                settings.request_timeout,
             )
         # Process the response parts
                         safety_settings=safety_settings,
                     ),
                 ),
+                settings.request_timeout,
             )
         # Process the response parts

src/main.py CHANGED Viewed

@@ -136,7 +136,7 @@ with gr.Blocks(
     with gr.Column(visible=False, elem_id="loading-indicator") as loading_indicator:
         gr.HTML("<div class='loading-text'>🚀 Starting your adventure...</div>")
-    local_storage = gr.BrowserState("", "user_hash")
     # Constructor Interface (visible by default)
     with gr.Column(
@@ -313,7 +313,7 @@ with gr.Blocks(
     start_btn.click(
         fn=start_game_with_music,
         inputs=[
-            local_storage,
             setting_description,
             char_name,
             char_age,
@@ -330,13 +330,14 @@ with gr.Blocks(
             game_image,
             game_choices,
             custom_choice,
         ],
         concurrency_limit=CONCURRENCY_LIMIT,
     )
     back_btn.click(
         fn=return_to_constructor,
-        inputs=[local_storage],
         outputs=[
             loading_indicator,
             constructor_interface,
@@ -347,7 +348,7 @@ with gr.Blocks(
     custom_choice.submit(
         fn=update_scene,
-        inputs=[local_storage, custom_choice],
         outputs=[game_text, game_image, game_choices, custom_choice],
         concurrency_limit=CONCURRENCY_LIMIT,
     )
@@ -356,14 +357,15 @@ with gr.Blocks(
     demo.load(
         fn=generate_user_hash,
         inputs=[],
-        outputs=[local_storage],
     )
-    local_storage.change(
         fn=update_audio,
-        inputs=[],
         outputs=[audio_out],
         concurrency_limit=CONCURRENCY_LIMIT,
     )
 demo.queue()
 demo.launch(ssr_mode=False)

     with gr.Column(visible=False, elem_id="loading-indicator") as loading_indicator:
         gr.HTML("<div class='loading-text'>🚀 Starting your adventure...</div>")
+    ls_user_hash = gr.BrowserState("", "user_hash")
     # Constructor Interface (visible by default)
     with gr.Column(
     start_btn.click(
         fn=start_game_with_music,
         inputs=[
+            ls_user_hash,
             setting_description,
             char_name,
             char_age,
             game_image,
             game_choices,
             custom_choice,
+            ls_user_hash,
         ],
         concurrency_limit=CONCURRENCY_LIMIT,
     )
     back_btn.click(
         fn=return_to_constructor,
+        inputs=[ls_user_hash],
         outputs=[
             loading_indicator,
             constructor_interface,
     custom_choice.submit(
         fn=update_scene,
+        inputs=[ls_user_hash, custom_choice],
         outputs=[game_text, game_image, game_choices, custom_choice],
         concurrency_limit=CONCURRENCY_LIMIT,
     )
     demo.load(
         fn=generate_user_hash,
         inputs=[],
+        outputs=[ls_user_hash],
     )
+    ls_user_hash.change(
         fn=update_audio,
+        inputs=[ls_user_hash],
         outputs=[audio_out],
         concurrency_limit=CONCURRENCY_LIMIT,
     )
 demo.queue()
 demo.launch(ssr_mode=False)