Spaces:

Agents-MCP-Hackathon
/

LLMGameHub

Running

App Files Files Community

gsavin commited on Jun 2

Commit

a277e33

1 Parent(s): 939ce2b

fix: partial audio fix

Browse files

Files changed (3) hide show

src/audio/audio_generator.py +63 -45
src/game_constructor.py +3 -4
src/main.py +9 -6

src/audio/audio_generator.py CHANGED Viewed

@@ -2,26 +2,24 @@ import asyncio
 from google import genai
 from google.genai import types
 from config import settings
-import os
-import tempfile
 import wave
-import numpy as np
 import queue
 import logging
 import gradio as gr
 logger = logging.getLogger(__name__)
 client = genai.Client(api_key=settings.gemini_api_key.get_secret_value(), http_options={'api_version': 'v1alpha'})
-audio_queue = queue.Queue(maxsize=1)
-async def generate_music(request: gr.Request, music_tone: str, receive_audio):
       async with (
         client.aio.live.music.connect(model='models/lyria-realtime-exp') as session,
         asyncio.TaskGroup() as tg,
       ):
         # Set up task to receive server messages.
-        tg.create_task(receive_audio(session))
         # Send initial prompts and config
         await session.set_weighted_prompts(
@@ -33,14 +31,18 @@ async def generate_music(request: gr.Request, music_tone: str, receive_audio):
           config=types.LiveMusicGenerationConfig(bpm=90, temperature=1.0)
         )
         await session.play()
-        logger.info(f"Started music generation for session {request.session_hash}, music tone: {music_tone}")
-        sessions[request.session_hash] = session
-async def change_music_tone(request: gr.Request, new_tone):
     logger.info(f"Changing music tone to {new_tone}")
-    session = sessions.get(request.session_hash)
     if not session:
-        logger.error(f"No session found for request {request.session_hash}")
         return
     await session.reset_context()
     await session.set_weighted_prompts(
@@ -49,55 +51,71 @@ async def change_music_tone(request: gr.Request, new_tone):
 SAMPLE_RATE = 48000
-async def receive_audio(session):
     """Process incoming audio from the music generation."""
     while True:
         try:
             async for message in session.receive():
                 if message.server_content and message.server_content.audio_chunks:
                     audio_data = message.server_content.audio_chunks[0].data
-                    await asyncio.to_thread(audio_queue.put, audio_data)
                 await asyncio.sleep(10**-12)
         except Exception as e:
             logger.error(f"Error in receive_audio: {e}")
-            await asyncio.sleep(1)
 sessions = {}
-async def start_music_generation(request: gr.Request, music_tone: str):
     """Start the music generation in a separate thread."""
-    await generate_music(request, music_tone, receive_audio)
-async def cleanup_music_session(request: gr.Request):
-    if request.session_hash in sessions:
-        logger.info(f"Cleaning up music session for session {request.session_hash}")
-        await sessions[request.session_hash].stop()
-        del sessions[request.session_hash]
-current_audio_file = None
-def update_audio():
-    """Continuously stream audio from the queue."""
-    global current_audio_file
     while True:
-        audio_data = audio_queue.get()
-        if isinstance(audio_data, bytes):
-            audio_array = np.frombuffer(audio_data, dtype=np.int16)
-        else:
-            audio_array = np.array(audio_data, dtype=np.int16)
-        temp_fd, temp_path = tempfile.mkstemp(suffix='.wav')
-        os.close(temp_fd)
-        # Write to WAV file
-        with wave.open(temp_path, 'wb') as wav_file:
-            wav_file.setnchannels(2)  # Stereo
-            wav_file.setsampwidth(2)  # 16-bit
-            wav_file.setframerate(SAMPLE_RATE)
-            wav_file.writeframes(audio_array.tobytes())
-        if current_audio_file:
-            os.remove(current_audio_file)
-        current_audio_file = temp_path
-        yield temp_path

 from google import genai
 from google.genai import types
 from config import settings
 import wave
 import queue
 import logging
 import gradio as gr
+import io
+import time
 logger = logging.getLogger(__name__)
 client = genai.Client(api_key=settings.gemini_api_key.get_secret_value(), http_options={'api_version': 'v1alpha'})
+async def generate_music(user_hash: str, music_tone: str, receive_audio):
       async with (
         client.aio.live.music.connect(model='models/lyria-realtime-exp') as session,
         asyncio.TaskGroup() as tg,
       ):
         # Set up task to receive server messages.
+        tg.create_task(receive_audio(session, user_hash))
         # Send initial prompts and config
         await session.set_weighted_prompts(
           config=types.LiveMusicGenerationConfig(bpm=90, temperature=1.0)
         )
         await session.play()
+        logger.info(f"Started music generation for user hash {user_hash}, music tone: {music_tone}")
+        await cleanup_music_session(user_hash)
+        sessions[user_hash] = {
+            'session': session,
+            'queue': queue.Queue(maxsize=3)
+        }
+async def change_music_tone(user_hash: str, new_tone):
     logger.info(f"Changing music tone to {new_tone}")
+    session = sessions.get(user_hash, {}).get('session')
     if not session:
+        logger.error(f"No session found for user hash {user_hash}")
         return
     await session.reset_context()
     await session.set_weighted_prompts(
 SAMPLE_RATE = 48000
+NUM_CHANNELS = 2  # Stereo
+SAMPLE_WIDTH = 2  # 16-bit audio -> 2 bytes per sample
+async def receive_audio(session, user_hash):
     """Process incoming audio from the music generation."""
     while True:
         try:
             async for message in session.receive():
                 if message.server_content and message.server_content.audio_chunks:
                     audio_data = message.server_content.audio_chunks[0].data
+                    queue = sessions[user_hash]['queue']
+                    # audio_data is already bytes (raw PCM)
+                    await asyncio.to_thread(queue.put, audio_data)
                 await asyncio.sleep(10**-12)
         except Exception as e:
             logger.error(f"Error in receive_audio: {e}")
+            break
 sessions = {}
+async def start_music_generation(user_hash: str, music_tone: str):
     """Start the music generation in a separate thread."""
+    await generate_music(user_hash, music_tone, receive_audio)
+async def cleanup_music_session(user_hash: str):
+    if user_hash in sessions:
+        logger.info(f"Cleaning up music session for user hash {user_hash}")
+        session = sessions[user_hash]['session']
+        await session.stop()
+        await session.close()
+        del sessions[user_hash]
+def update_audio(user_hash):
+    """Continuously stream audio from the queue as WAV bytes."""
     while True:
+        if user_hash not in sessions:
+            time.sleep(0.5)
+            continue
+        queue = sessions[user_hash]['queue']
+        pcm_data = queue.get() # This is raw PCM audio bytes
+        if not isinstance(pcm_data, bytes):
+            logger.warning(f"Expected bytes from audio_queue, got {type(pcm_data)}. Skipping.")
+            continue
+        # Lyria provides stereo, 16-bit PCM at 48kHz.
+        # Ensure the number of bytes is consistent with stereo 16-bit audio.
+        # Each frame = NUM_CHANNELS * SAMPLE_WIDTH bytes.
+        # If len(pcm_data) is not a multiple of (NUM_CHANNELS * SAMPLE_WIDTH),
+        # it might indicate an incomplete chunk or an issue.
+        bytes_per_frame = NUM_CHANNELS * SAMPLE_WIDTH
+        if len(pcm_data) % bytes_per_frame != 0:
+            logger.warning(
+                f"Received PCM data with length {len(pcm_data)}, which is not a multiple of "
+                f"bytes_per_frame ({bytes_per_frame}). This might cause issues with WAV formatting."
+            )
+            # Depending on strictness, you might want to skip this chunk:
+            # continue
+        wav_buffer = io.BytesIO()
+        with wave.open(wav_buffer, 'wb') as wf:
+            wf.setnchannels(NUM_CHANNELS)
+            wf.setsampwidth(SAMPLE_WIDTH) # Corresponds to 16-bit audio
+            wf.setframerate(SAMPLE_RATE)
+            wf.writeframes(pcm_data)
+        wav_bytes = wav_buffer.getvalue()
+        yield wav_bytes

src/game_constructor.py CHANGED Viewed

@@ -108,7 +108,7 @@ def save_game_config(
         return f"❌ Error saving configuration: {str(e)}"
 async def start_game_with_settings(
-    request: gr.Request,
     setting_desc: str,
     char_name: str,
     char_age: str,
@@ -160,10 +160,9 @@ NOTE FOR THE ASSISTANT: YOU HAVE TO GENERATE THE IMAGE FOR THE START SCENE.
     response = await process_user_input(initial_story)
-    music_tone = response.change_music.music_description
-    if music_tone:
-        asyncio.create_task(start_music_generation(request, music_tone))
     img = "forest.jpg"

         return f"❌ Error saving configuration: {str(e)}"
 async def start_game_with_settings(
+    user_hash: str,
     setting_desc: str,
     char_name: str,
     char_age: str,
     response = await process_user_input(initial_story)
+    music_tone = response.change_music.music_description or "neutral"
+    asyncio.create_task(start_music_generation(user_hash, music_tone))
     img = "forest.jpg"

src/main.py CHANGED Viewed

@@ -32,7 +32,7 @@ def return_to_constructor():
     )
-async def update_scene(request: gr.Request, choice):
     logger.info(f"Updating scene with choice: {choice}")
     if isinstance(choice, str):
         old_scene = state["scene"]
@@ -61,7 +61,7 @@ async def update_scene(request: gr.Request, choice):
                 story[new_scene]["image"] = img_path
         if response.change_music.change_music:
-            await change_music_tone(request, response.change_music.music_description)
     scene = story[state["scene"]]
     return (
@@ -92,7 +92,7 @@ def update_preview(setting, name, age, background, personality, genre):
 async def start_game_with_music(
-    request: gr.Request,
     setting_desc: str,
     char_name: str,
     char_age: str,
@@ -113,7 +113,7 @@ async def start_game_with_music(
     # First, get the game interface updates
     result = await start_game_with_settings(
-        request,
         setting_desc,
         char_name,
         char_age,
@@ -132,6 +132,8 @@ with gr.Blocks(
     # Fullscreen Loading Indicator (hidden by default)
     with gr.Column(visible=False, elem_id="loading-indicator") as loading_indicator:
         gr.HTML("<div class='loading-text'>🚀 Starting your adventure...</div>")
     # Constructor Interface (visible by default)
     with gr.Column(
@@ -296,6 +298,7 @@ with gr.Blocks(
     start_btn.click(
         fn=start_game_with_music,
         inputs=[
             setting_description,
             char_name,
             char_age,
@@ -327,14 +330,14 @@ with gr.Blocks(
     game_choices.change(
         fn=update_scene,
-        inputs=[game_choices],
         outputs=[game_text, game_image, game_choices],
     )
     demo.unload(cleanup_music_session)
     demo.load(
         fn=update_audio,
-        inputs=None,
         outputs=[audio_out],
     )

     )
+async def update_scene(user_hash: str, choice):
     logger.info(f"Updating scene with choice: {choice}")
     if isinstance(choice, str):
         old_scene = state["scene"]
                 story[new_scene]["image"] = img_path
         if response.change_music.change_music:
+            await change_music_tone(user_hash, response.change_music.music_description)
     scene = story[state["scene"]]
     return (
 async def start_game_with_music(
+    user_hash: str,
     setting_desc: str,
     char_name: str,
     char_age: str,
     # First, get the game interface updates
     result = await start_game_with_settings(
+        user_hash,
         setting_desc,
         char_name,
         char_age,
     # Fullscreen Loading Indicator (hidden by default)
     with gr.Column(visible=False, elem_id="loading-indicator") as loading_indicator:
         gr.HTML("<div class='loading-text'>🚀 Starting your adventure...</div>")
+    local_storage = gr.BrowserState(str(uuid.uuid4()), "user_hash")
     # Constructor Interface (visible by default)
     with gr.Column(
     start_btn.click(
         fn=start_game_with_music,
         inputs=[
+            local_storage,
             setting_description,
             char_name,
             char_age,
     game_choices.change(
         fn=update_scene,
+        inputs=[local_storage, game_choices],
         outputs=[game_text, game_image, game_choices],
     )
     demo.unload(cleanup_music_session)
     demo.load(
         fn=update_audio,
+        inputs=[local_storage],
         outputs=[audio_out],
     )